diff --git a/src/project_layouts/README.md b/src/project_layouts/README.md new file mode 100644 index 00000000..2d52f366 --- /dev/null +++ b/src/project_layouts/README.md @@ -0,0 +1,20 @@ + +project_layout information +========================== + +project layouts don't show as available unless referenced by an entry in a layout_ref folder within the src/decktemplates structure. + +This is because some layouts may be intended for use with child projects generated from 'deck project.new' - but not directly by this parent project. + +The structure of decktemplates and project_layouts is designed to avoid circular dependencies arising during creation of layouts for child projects - that may then need to in turn support the same layout if the child project creates projects. + +Layouts are not included in the resource files of modules for the same reason. (modules may need to be included in layouts) +Instead they are pointed to via the decktemplates custom & vendor structure. + +-------- + +layouts within project_layouts/vendor should generally not be customised directly +The vendor layouts should be created and updated by loading the appropriate vendor modules. +These are plugin modules that are providers of the punk.projectlayout capability +(see punk::cap module documentation and the project_layouts/vendor/punk/sample-0.1 layout) + diff --git a/src/project_layouts/custom/_project/punk.basic/src/build.tcl b/src/project_layouts/custom/_project/punk.basic/src/build.tcl new file mode 100644 index 00000000..734ccb87 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.basic/src/build.tcl @@ -0,0 +1,6 @@ +#!/bin/sh +# -*- tcl -*- \ +# 'build.tcl' name as required by kettle +# Can be run directly - but also using `deck Kettle ...` or `deck KettleShell ...`\ +exec ./kettle -f "$0" "${1+$@}" +kettle doc diff --git a/src/project_layouts/custom/_project/punk.basic/src/make.tcl b/src/project_layouts/custom/_project/punk.basic/src/make.tcl new file mode 100644 index 00000000..e25031ed --- /dev/null +++ b/src/project_layouts/custom/_project/punk.basic/src/make.tcl @@ -0,0 +1,1149 @@ +# tcl +# +#make any tclkits and modules in src and place them and associated data files/scripts in the parent folder of src. +#e.g in 'bin' and 'modules' folders at same level as 'src' folder. + +set hashline "# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###" +puts $hashline +puts " punkshell make script " +puts $hashline\n +namespace eval ::punkmake { + variable scriptfolder [file normalize [file dirname [info script]]] + variable foldername [file tail $scriptfolder] + variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] + variable non_help_flags [list -k] + variable help_flags [list -help --help /?] + variable known_commands [list project get-project-info shell vendorupdate bootsupport] +} +if {"::try" ni [info commands ::try]} { + puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" + exit 1 +} + +#------------------------------------------------------------------------------ +#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#------------------------------------------------------------------------------ +#If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files +# - then it will attempt to preference these modules +# This allows a source update via 'fossil update' 'git pull' etc to pull in a minimal set of support modules for the make script +# and load these in preference to ones that may have been in the interp's tcl::tm::list or auto_path due to environment variables +set startdir [pwd] +if {[file exists [file join $startdir src bootsupport]]} { + set bootsupport_mod [file join $startdir src bootsupport modules] + set bootsupport_lib [file join $startdir src bootsupport lib] +} else { + set bootsupport_mod [file join $startdir bootsupport modules] + set bootsupport_lib [file join $startdir bootsupport lib] +} +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + + set original_tm_list [tcl::tm::list] + tcl::tm::remove {*}$original_tm_list + set original_auto_path $::auto_path + set ::auto_path [list $bootsupport_lib] + + set support_modules [glob -nocomplain -dir $bootsupport_mod -type f -tail *.tm] + set tcl_core_packages [list tcl::zlib zlib tcl::oo TclOO tcl::tommath tcl::zipfs Tcl Tk] ;#packages we + if {[llength $support_modules] || [llength [glob -nocomplain -dir $bootsupport_lib -tail *]]} { + #only forget all *unloaded* package names + foreach pkg [package names] { + if {$pkg in $tcl_core_packages} { + continue + } + if {![llength [package versions $pkg]]} { + #puts stderr "Got no versions for pkg $pkg" + continue + } + if {![string length [package provide $pkg]]} { + #no returned version indicates it wasn't loaded - so we can forget its index + package forget $pkg + } + } + tcl::tm::add $bootsupport_mod + } + + + if {[file exists [pwd]/modules]} { + tcl::tm::add [pwd]/modules + } + + #package require Thread + # - the full repl requires Threading and punk,shellfilter,shellrun to call and display properly. + + + # tm list already indexed - need 'package forget' to find modules based on current tcl::tm::list + #These are strong dependencies + package forget punk::mix + package require punk::mix + package forget punk::repo + package require punk::repo + package forget punkcheck + package require punkcheck + + + + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] + #------------------------------------------------------------------------------ +} + +# ** *** *** *** *** *** *** *** *** *** *** *** +#*temporarily* hijack package command +# ** *** *** *** *** *** *** *** *** *** *** *** +try { + rename ::package ::punkmake::package_temp_aside + proc ::package {args} { + if {[lindex $args 0] eq "require"} { + lappend ::punkmake::pkg_requirements [lindex $args 1] + } + } + package require punk::mix + package require punk::repo +} finally { + catch {rename ::package ""} + catch {rename ::punkmake::package_temp_aside ::package} +} +# ** *** *** *** *** *** *** *** *** *** *** *** +foreach pkg $::punkmake::pkg_requirements { + if {[catch {package require $pkg} errM]} { + puts stderr "missing pkg: $pkg" + lappend ::punkmake::pkg_missing $pkg + } else { + lappend ::punkmake::pkg_loaded $pkg + } +} + + + + + +proc punkmake_gethelp {args} { + set scriptname [file tail [info script]] + append h "Usage:" \n + append h "" \n + append h " $scriptname -help or $scriptname --help or $scriptname /? or just $scriptname" \n + append h " - This help." \n \n + append h " $scriptname project ?-k?" \n + append h " - this is the literal word project - and confirms you want to run the project build" \n + append h " - the optional -k flag will terminate processes running as the executable being built (if applicable)" \n \n + append h " $scriptname bootsupport" \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " $scriptname vendorupdate" \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " $scriptname get-project-info" \n + append h " - show the name and base folder of the project to be built" \n + append h "" \n + if {[llength $::punkmake::pkg_missing]} { + append h "* ** NOTE ** ***" \n + append h " punkmake has detected that the following packages could not be loaded:" \n + append h " " [join $::punkmake::pkg_missing "\n "] \n + append h "* ** *** *** ***" \n + append h " These packages are required for punk make to function" \n \n + append h "* ** *** *** ***" \n\n + append h "Successfully Loaded packages:" \n + append h " " [join $::punkmake::pkg_loaded "\n "] \n + } + return $h +} +set scriptargs $::argv +set do_help 0 +if {![llength $scriptargs]} { + set do_help 1 +} else { + foreach h $::punkmake::help_flags { + if {[lsearch $scriptargs $h] >= 0} { + set do_help 1 + break + } + } +} +set commands_found [list] +foreach a $scriptargs { + if {![string match -* $a]} { + lappend commands_found $a + } else { + if {$a ni $::punkmake::non_help_flags} { + set do_help 1 + } + } +} +if {[llength $commands_found] != 1 } { + set do_help 1 +} elseif {[lindex $commands_found 0] ni $::punkmake::known_commands} { + puts stderr "Unknown command: [lindex $commands_found 0]\n\n" + set do_help 1 +} +if {$do_help} { + puts stderr [punkmake_gethelp] + exit 0 +} + +set ::punkmake::command [lindex $commands_found 0] + + +if {[lsearch $::argv -k] >= 0} { + set forcekill 1 +} else { + set forcekill 0 +} +#puts stdout "::argv $::argv" +# ---------------------------------------- + +set scriptfolder $::punkmake::scriptfolder + + + +#first look for a project root (something under fossil or git revision control AND matches punk project folder structure) +#If that fails - just look for a 'project shaped folder' ie meets minimum requirements of /src /src/lib /src/modules /lib /modules +if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} { + if {![string length [set projectroot [punk::repo::find_candidate $scriptfolder]]]} { + puts stderr "punkmake script unable to determine an approprite project root at or above the path '$scriptfolder' ensure the make script is within a project folder structure" + puts stderr " -aborted- " + exit 2 + #todo? + #ask user for a project name and create basic structure? + #call punk::mix::cli::new $projectname on parent folder? + } else { + puts stderr "WARNING punkmake script operating in project space that is not under version control" + } +} else { + +} + +set sourcefolder $projectroot/src + +if {$::punkmake::command eq "get-project-info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- -- get-project-info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- projectroot : $projectroot" + if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { + set vc "fossil" + set rev [punk::repo::fossil_revision $scriptfolder] + set rem [punk::repo::fossil_remote $scriptfolder] + } elseif {[punk::repo::find_git $scriptfolder] eq $projectroot} { + set vc "git" + set rev [punk::repo::git_revision $scriptfolder] + set rem [punk::repo::git_remote $scriptfolder] + } else { + set vc " - none found -" + set rev "n/a" + set remotes "n/a" + } + puts stdout "- version control : $vc" + puts stdout "- revision : $rev" + puts stdout "- remote : $rem" + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + + exit 0 +} + +if {$::punkmake::command eq "shell"} { + package require punk + package require punk::repl + puts stderr "make shell not fully implemented - dropping into ordinary punk shell" + repl::start stdin + + exit 1 +} + +if {$::punkmake::command eq "vendorupdate"} { + puts "projectroot: $projectroot" + puts "script: [info script]" + #puts "-- [tcl::tm::list] --" + puts stdout "Updating vendor modules in src folder" + + proc vendor_localupdate {projectroot} { + set local_modules [list] + set git_modules [list] + set fossil_modules [list] + set sourcefolder $projectroot/src + #todo vendor/lib + set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] + + set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] + lappend vendormodulefolders vendormodules + foreach vf $vendormodulefolders { + if {[file exists $sourcefolder/$vf]} { + lassign [split $vf _] _vm tclx + if {$tclx ne ""} { + set which _$tclx + } else { + set which "" + } + + set vendor_config $sourcefolder/vendormodules$which/include_modules.config + if {[file exists $vendor_config]} { + set targetroot $sourcefolder/vendormodules$which + source $vendor_config ;#populate $local_modules $git_modules $fossil_modules with project-specific list + if {![llength $local_modules]} { + puts stderr "src/vendormodules$which No local vendor modules configured for updating (config file: $vendor_config)" + } else { + if {[catch { + #---------- + set vendor_installer [punkcheck::installtrack new make.tcl $sourcefolder/vendormodules$which/.punkcheck] + $vendor_installer set_source_target $projectroot $sourcefolder/vendormodules$which + set installation_event [$vendor_installer start_event {-make_step vendorupdate}] + #---------- + } errM]} { + puts stderr "Unable to use punkcheck for vendormodules$which update. Error: $errM" + set installation_event "" + } + foreach {relpath module} $local_modules { + set module [string trim $module :] + set module_subpath [string map {:: /} [namespace qualifiers $module]] + set srclocation [file join $projectroot $relpath $module_subpath] + #puts stdout "$relpath $module $module_subpath $srclocation" + set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $module]-*] + #lsort won't sort version numbers properly e.g with -dictionary 0.1.1 comes before 0.1 + if {![llength $pkgmatches]} { + puts stderr "Missing source for vendor module $module - not found in $srclocation" + continue + } + set latestfile [lindex $pkgmatches 0] + set latestver [lindex [split [file rootname $latestfile] -] 1] + foreach m $pkgmatches { + lassign [split [file rootname $m] -] _pkg ver + #puts "comparing $ver vs $latestver" + if {[package vcompare $ver $latestver] == 1} { + set latestver $ver + set latestfile $m + } + } + set srcfile [file join $srclocation $latestfile] + set tgtfile [file join $targetroot $module_subpath $latestfile] + if {$installation_event ne ""} { + #---------- + $installation_event targetset_init INSTALL $tgtfile + $installation_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$installation_event targetset_source_changes] changed]]\ + || [llength [$installation_event get_targets_exist]] < [llength [$installation_event get_targets]]\ + } { + file mkdir [file dirname $tgtfile] ;#ensure containing folder for target exists + $installation_event targetset_started + # -- --- --- --- --- --- + puts "VENDORMODULES$which update: $srcfile -> $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $installation_event targetset_end FAILED + } else { + $installation_event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts -nonewline stderr "." + $installation_event targetset_end SKIPPED + } + $installation_event end + } else { + file copy -force $srcfile $tgtfile + } + } + + } + } else { + puts stderr "No config at $vendor_config - nothing configured to update" + } + } + } + } + + vendor_localupdate $projectroot + + puts stdout " vendor package update done " + flush stderr + flush stdout + ::exit 0 +} + +if {$::punkmake::command eq "bootsupport"} { + puts "projectroot: $projectroot" + puts "script: [info script]" + #puts "-- [tcl::tm::list] --" + puts stdout "Updating bootsupport from local files" + + proc bootsupport_localupdate {projectroot} { + set bootsupport_modules [list] ;#variable populated by include_modules.config file - review + + set bootmodulefolders [glob -nocomplain -dir $sourcefolder/bootsupport -type d -tails modules_tcl*] + lappend bootmodulefolder modules + foreach bm $bootmodulefolders { + if {[file exists $sourcefolder/bootsupport/$bm]} { + lassign [split $bm _] _bm tclx + if {$tclx ne ""} { + set which _$tclx + } else { + set which "" + } + set bootsupport_config $projectroot/src/bootsupport/modules$which/include_modules.config ;# + if {[file exists $bootsupport_config]} { + set targetroot $projectroot/src/bootsupport/modules$which + source $bootsupport_config ;#populate $bootsupport_modules with project-specific list + if {![llength $bootsupport_modules]} { + puts stderr "bootsupport/modules$which - No local bootsupport modules configured for updating" + } else { + + if {[catch { + #---------- + set boot_installer [punkcheck::installtrack new make.tcl $projectroot/src/bootsupport/.punkcheck] + $boot_installer set_source_target $projectroot $projectroot/src/bootsupport + set boot_event [$boot_installer start_event {-make_step bootsupport}] + #---------- + } errM]} { + puts stderr "Unable to use punkcheck for bootsupport error: $errM" + set boot_event "" + } + + foreach {relpath module} $bootsupport_modules { + set module [string trim $module :] + set module_subpath [string map [list :: /] [namespace qualifiers $module]] + set srclocation [file join $projectroot $relpath $module_subpath] + #puts stdout "$relpath $module $module_subpath $srclocation" + set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $module]-*] + #lsort won't sort version numbers properly e.g with -dictionary 0.1.1 comes before 0.1 + if {![llength $pkgmatches]} { + puts stderr "Missing source for bootsupport module $module - not found in $srclocation" + continue + } + set latestfile [lindex $pkgmatches 0] + set latestver [lindex [split [file rootname $latestfile] -] 1] + foreach m $pkgmatches { + lassign [split [file rootname $m] -] _pkg ver + #puts "comparing $ver vs $latestver" + if {[package vcompare $ver $latestver] == 1} { + set latestver $ver + set latestfile $m + } + } + set srcfile [file join $srclocation $latestfile] + set tgtfile [file join $targetroot $module_subpath $latestfile] + if {$boot_event ne ""} { + #---------- + $boot_event targetset_init INSTALL $tgtfile + $boot_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$boot_event targetset_source_changes] changed]]\ + || [llength [$boot_event get_targets_exist]] < [llength [$boot_event get_targets]]\ + } { + file mkdir [file dirname $tgtfile] ;#ensure containing folder for target exists + $boot_event targetset_started + # -- --- --- --- --- --- + puts "BOOTSUPPORT module$which update: $srcfile -> $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $boot_event targetset_end FAILED + } else { + $boot_event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts -nonewline stderr "." + $boot_event targetset_end SKIPPED + } + $boot_event end + } else { + file copy -force $srcfile $tgtfile + } + } + if {$boot_event ne ""} { + puts \n + $boot_event destroy + $boot_installer destroy + } + } + + } + } + } + } + + bootsupport_localupdate $projectroot + + #if this project has custom project layouts, and there is a bootsupport folder - update their bootsupport + + set layout_bases [list\ + $sourcefolder/project_layouts/custom/_project\ + ] + foreach project_layout_base $layout_bases { + if {[file exists $project_layout_base]} { + set project_layouts [glob -nocomplain -dir $project_layout_base -type d -tail *] + foreach layoutname $project_layouts { + #don't auto-create src/bootsupport - just update it if it exists + if {[file exists [file join $project_layout_base $layoutname/src/bootsupport]]} { + set antipaths [list\ + README.md\ + ] + set boot_module_folders [glob -nocomplain -dir $projectroot/src/bootsupport/modules_tcl*] + lappend bootsupport_module_folders "modules" + foreach bm $bootsupport_module_folders { + if {[file exists $projectroot/src/bootsupport/$bm]} { + lassign [split $bm _] _bm tclx + if {$tclx ne ""} { + set which _$tclx + } else { + set which "" + } + set sourcemodules $projectroot/src/bootsupport/modules$which + set targetroot [file join $project_layout_base $layoutname/src/bootsupport/modules$which] + file mkdir $targetroot + + puts stdout "BOOTSUPPORT$which layouts/$layoutname: copying from $sourcemodules to $targetroot (if source file changed)" + set resultdict [punkcheck::install $sourcemodules $targetroot -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + flush stdout + } + } + } + } + } else { + puts stderr "No layout base at $project_layout_base" + } + } + puts stdout " bootsupport done " + flush stderr + flush stdout + #punk86 can hang if calling make.tcl via 'run' without this 'after' delay. punk87 unaffected. cause unknown. + #after 500 + ::exit 0 +} + + + +if {$::punkmake::command ne "project"} { + puts stderr "Command $::punkmake::command not implemented - aborting." + flush stderr + after 100 + exit 1 +} + + + +#external libs and modules first - and any supporting files - no 'building' required + +#install src vendor contents (from version controlled src folder) to base of project (same target folders as our own src/modules etc ie to paths that go on the auto_path and in tcl::tm::list) + +set vendorlibfolders [glob -nocomplain -dir $sourcefolder -type d -tails vendorlib_tcl*] +lappend vendorlibfolders vendorlib + +foreach lf $vendorlibfolders { + if {[file exists $sourcefolder/$lf]} { + lassign [split $lf _] _vm tclx + if {$tclx ne ""} { + set which _$tclx + } else { + set which "" + } + set target_lib_folder $projectroot/lib$which + file mkdir $projectroot/lib$which + + #exclude README.md from source folder - but only the root one + #-antiglob_paths takes relative patterns e.g + # */test.txt will only match test.txt exactly one level deep. + # */*/*.foo will match any path ending in .foo that is exactly 2 levels deep. + # **/test.txt will match at any level below the root (but not in the root) + set antipaths [list\ + README.md\ + ] + + puts stdout "VENDORLIB$which: copying from $sourcefolder/$lf to $target_lib_folder (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/$lf $target_lib_folder -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + } +} +if {![llength $vendorlibfolders]} { + puts stderr "VENDORLIB: No src/vendorlib or src/vendorlib_tcl* folder found." +} + +set vendormodulefolders [glob -nocomplain -dir $sourcefolder -type d -tails vendormodules_tcl*] +lappend vendormodulefolders vendormodules + +foreach vf $vendormodulefolders { + if {[file exists $sourcefolder/$vf]} { + lassign [split $vf _] _vm tclx + if {$tclx ne ""} { + set which _$tclx + } else { + set which "" + } + set target_module_folder $projectroot/modules$which + file mkdir $target_module_folder + + #install .tm *and other files* + puts stdout "VENDORMODULES$which: copying from $sourcefolder/$vf to $target_module_folder (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/$vf $target_module_folder -installer make.tcl -overwrite installedsourcechanged-targets -antiglob_paths {README.md include_modules.config}] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + } +} +if {![llength $vendormodulefolders]} { + puts stderr "VENDORMODULES: No src/vendormodules or src/vendormodules_tcl* folders found." +} + +######################################################## +#templates +#e.g The default project layout is mainly folder structure and readme files - but has some scripts developed under the main src that we want to sync +#src to src/modules/punk/mix/templates/layouts/project/src + +set old_layout_update_list [list\ + [list project $sourcefolder/modules/punk/mix/templates]\ + [list basic $sourcefolder/mixtemplates]\ + ] +set layout_bases [list\ + $sourcefolder/project_layouts/custom/_project\ + ] + +foreach layoutbase $layout_bases { + if {![file exists $layoutbase]} { + continue + } + set project_layouts [glob -nocomplain -dir $layoutbase -type d -tail *] + foreach layoutname $project_layouts { + set config [dict create\ + -make-step sync_layouts\ + ] + #---------- + set tpl_installer [punkcheck::installtrack new make.tcl $layoutbase/.punkcheck] + $tpl_installer set_source_target $sourcefolder $layoutbase + set tpl_event [$tpl_installer start_event $config] + #---------- + set pairs [list] + set pairs [list\ + [list $sourcefolder/build.tcl $layoutbase/$layoutname/src/build.tcl]\ + [list $sourcefolder/make.tcl $layoutbase/$layoutname/src/make.tcl]\ + ] + + foreach filepair $pairs { + lassign $filepair srcfile tgtfile + + file mkdir [file dirname $tgtfile] + #---------- + $tpl_event targetset_init INSTALL $tgtfile + $tpl_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$tpl_event targetset_source_changes] changed]]\ + || [llength [$tpl_event get_targets_exist]] < [llength [$tpl_event get_targets]]\ + } { + $tpl_event targetset_started + # -- --- --- --- --- --- + puts stdout "PROJECT LAYOUT update - layoutname: $layoutname Copying from $srcfile to $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $tpl_event targetset_end FAILED -note "layout:$layoutname copy failed with err: $errM" + } else { + $tpl_event targetset_end OK -note "layout:$layoutname" + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $tpl_event targetset_end SKIPPED + } + } + + $tpl_event end + $tpl_event destroy + $tpl_installer destroy + } +} +######################################################## + +#consolidated /modules /modules_tclX folder used for target where X is tcl major version +#the make process will process for any _tclX not just the major version of the current interpreter + +#default source module folders are at projectroot/src/modules and projectroot/src/modules_tclX (where X is tcl major version) +#There may be multiple other src module folders at same level (e.g folder not being other special-purpose folder and not matching name vendor* that contains at least one .tm file in its root) +set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] +puts stdout "SOURCEMODULES: scanning [llength $source_module_folderlist] folders" +foreach src_module_dir $source_module_folderlist { + set mtail [file tail $src_module_dir] + if {[string match "modules_tcl*" $mtail]} { + set target_modules_base $projectroot/$mtail + } else { + set target_modules_base $projectroot/modules + } + file mkdir $target_modules_base + + puts stderr "Processing source module dir: $src_module_dir" + set dirtail [file tail $src_module_dir] + #modules and associated files belonging to this package/app + set copied [punk::mix::cli::lib::build_modules_from_source_to_base $src_module_dir $target_modules_base -glob *.tm] ;#will only accept a glob ending in .tm + #set copied [list] + puts stdout "--------------------------" + puts stderr "Copied [llength $copied] tm modules from src/$dirtail to $target_modules_base " + puts stdout "--------------------------" + + set overwrite "installedsourcechanged-targets" + #set overwrite "ALL-TARGETS" + puts stdout "MODULEFOLDER non_tm_files $src_module_dir - copying to $target_modules_base (if source file changed)" + set resultdict [punkcheck::install_non_tm_files $src_module_dir $target_modules_base -installer make.tcl -overwrite $overwrite -antiglob_paths {README.md}] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] +} + +set installername "make.tcl" + +# ---------------------------------------- +if {[punk::repo::is_fossil_root $projectroot]} { + set config [dict create\ + -make-step configure_fossil\ + ] + #---------- + set installer [punkcheck::installtrack new $installername $projectroot/.punkcheck] + $installer set_source_target $projectroot $projectroot + + set event [$installer start_event $config] + $event targetset_init VIRTUAL fossil_settings_mainmenu ;#VIRTUAL - since there is no actual target file + set menufile $projectroot/.fossil-custom/mainmenu + $event targetset_addsource $menufile + #---------- + + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "Configuring fossil setting: mainmenu from: $menufile" + if {[catch { + set fd [open $menufile r] + fconfigure $fd -translation binary + set data [read $fd] + close $fd + exec fossil settings mainmenu $data + } errM]} { + $event targetset_end FAILED -note "fossil update failed: $errM" + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $event targetset_end SKIPPED + } + $event end + $event destroy + $installer destroy +} + +set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] +if {$buildfolder ne "$sourcefolder/_build"} { + puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" + puts stdout " -aborted- " + exit 2 +} + + +#find runtimes +set rtfolder $sourcefolder/runtime +set runtimes [glob -nocomplain -dir $rtfolder -types {f x} -tail *] +if {![llength $runtimes]} { + puts stderr "No executable runtimes found in $rtfolder - unable to build any .vfs folders into executables." + puts stderr "Add runtimes to $sourcefolder/runtime if required" + exit 0 +} + +if {[catch {exec sdx help} errM]} { + puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" + puts stderr "err: $errM" + exit 1 +} + +# -- --- --- --- --- --- --- --- --- --- +#load mapvfs.config file (if any) in runtime folder to map runtimes to vfs folders. +#build a dict keyed on runtime executable name. +#If no mapfile (or no mapfile entry for that runtime) - the runtime will be paired with a matching .vfs folder in src folder. e.g punk.exe to src/punk.vfs +#If vfs folders or runtime executables which are explicitly listed in the mapfile don't exist - warn on stderr - but continue. if such nonexistants found; prompt user for whether to continue or abort. +set mapfile $rtfolder/mapvfs.config +set runtime_vfs_map [dict create] +set vfs_runtime_map [dict create] +if {[file exists $mapfile]} { + set fdmap [open $mapfile r] + fconfigure $fdmap -translation binary + set mapdata [read $fdmap] + close $fdmap + set mapdata [string map [list \r\n \n] $mapdata] + set missing [list] + foreach ln [split $mapdata \n] { + set ln [string trim $ln] + if {$ln eq "" || [string match #* $ln]} { + continue + } + set vfspaths [lassign $ln runtime] + if {[string match *.exe $runtime]} { + #.exe is superfluous but allowed + #drop windows .exe suffix so same config can work cross platform - extension will be re-added if necessary later + set runtime [string range $runtime 0 end-4] + } + if {$runtime ne "-"} { + set runtime_test $runtime + if {"windows" eq $::tcl_platform(platform)} { + set runtime_test $runtime.exe + } + if {![file exists [file join $rtfolder $runtime_test]]} { + puts stderr "WARNING: Missing runtime file $rtfolder/$runtime_test (line in mapvfs.config: $ln)" + lappend missing $runtime + } + } + foreach vfs $vfspaths { + if {![file isdirectory [file join $sourcefolder $vfs]]} { + puts stderr "WARNNING: Missing vfs folder [file join $sourcefolder $vfs] specified in mapvfs.config for runtime $runtime" + lappend missing $vfs + } + dict lappend vfs_runtime_map $vfs $runtime + } + if {[dict exists $runtime_vfs_map $runtime]} { + puts stderr "CONFIG FILE ERROR. runtime: $runtime was specified more than once in $mapfile." + exit 3 + } + dict set runtime_vfs_map $runtime $vfspaths + } + if {[llength $missing]} { + puts stderr "WARNING [llength $missing] missing items from $mapfile. (TODO - prompt user to continue/abort)" + foreach m $missing { + puts stderr " $m" + } + puts stderr "continuing..." + } +} +# -- --- --- --- --- --- --- --- --- --- + +set vfs_folders [glob -nocomplain -dir $sourcefolder -types d -tail *.vfs] +#add any extra .vfs folders found in runtime/mapvfs.config file (e.g myotherruntimes/something.vfs) +dict for {vfs -} $vfs_runtime_map { + if {$vfs ni $vfs_folders} { + lappend vfs_folders $vfs + } +} +if {![llength $vfs_folders]} { + puts stdout "No .vfs folders found at '$sourcefolder' - no kits to build" + puts stdout " -done- " + exit 0 +} + +set vfs_folder_changes [dict create] ;#cache whether each .vfs folder has changes so we don't re-run tests if building from same .vfs with multiple runtime executables + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- +#set runtimefile [lindex $runtimes 0] +foreach runtimefile $runtimes { + #runtimefile e.g tclkit86bi.exe on windows tclkit86bi on other platforms + + #sdx *may* be pointed to use the runtime we use to build the kit, or the user may manually use this runtime if they don't have tclsh + #sdx will complain if the same runtime is used for the shell as is used in the -runtime argument - so we make a copy (REVIEW) + #if {![file exists $buildfolder/buildruntime.exe]} { + # file copy $rtfolder/$runtimefile $buildfolder/buildruntime.exe + #} + + set basedir $buildfolder + set config [dict create\ + -make-step copy_runtime\ + ] + #---------- + set installer [punkcheck::installtrack new $installername $basedir/.punkcheck] + $installer set_source_target $rtfolder $buildfolder + set event [$installer start_event $config] + $event targetset_init INSTALL $buildfolder/build_$runtimefile + $event targetset_addsource $rtfolder/$runtimefile + #---------- + + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "Copying runtime from $rtfolder/$runtimefile to $buildfolder/build_$runtimefile" + if {[catch { + file copy -force $rtfolder/$runtimefile $buildfolder/build_$runtimefile + } errM]} { + $event targetset_end FAILED + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $event targetset_end SKIPPED + } + $event end + +} + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + +# +# loop over vfs_folders and for each one, loop over configured (or matching) runtimes - build with sdx if source .vfs or source runtime exe has changed. +# we are using punkcheck to install result to buildfolder so we create a .punkcheck file at the target folder to store metadata. +# punkcheck allows us to not rely purely on timestamps (which may be unreliable) +# +set startdir [pwd] +puts stdout "Found [llength $vfs_folders] .vfs folders - building executable for each..." +cd [file dirname $buildfolder] +#root folder mtime is insufficient for change detection. Tree mtime of folders only is a barely passable mechanism for vfs change detection in some circumstances - e.g if files added/removed but never edited in place +#a hash of full tree file & dir mtime may be more reasonable - but it remains to be seen if just tar & checksum is any/much slower. +#Simply rebuilding all the time may be close the speed of detecting change anyway - and almost certainly much faster when there is a change. +#Using first mtime encountered that is later than target is another option - but likely to be highly variable in speed. Last file in the tree could happen to be the latest, and this mechanism doesn't handle build on reversion to older source. +set exe_names_seen [list] +foreach vfs $vfs_folders { + + set vfsname [file rootname $vfs] + puts stdout " Processing vfs $sourcefolder/$vfs" + puts stdout " ------------------------------------" + set skipped_vfs_build 0 + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set basedir $buildfolder + set config [dict create\ + -make-step build_vfs\ + ] + + set runtimes [list] + if {[dict exists $vfs_runtime_map $vfs]} { + set runtimes [dict get $vfs_runtime_map $vfs] ;#map dict is unsuffixed (.exe stripped or was not present) + if {"windows" eq $::tcl_platform(platform)} { + set runtimes_raw $runtimes + set runtimes [list] + foreach rt $runtimes_raw { + if {![string match *.exe $rt] && $rt ne "-"} { + set rt $rt.exe + } + lappend runtimes $rt + } + } + } else { + #only match this vfs to a correspondingly named runtime if there was no explicit entry for that runtime + set matchrt [file rootname [file tail $vfs]] ;#e.g project.vfs -> project + if {![dict exists $runtime_vfs_map $matchrt]} { + if {"windows" eq $::tcl_platform(platform)} { + if {[file exists $rtfolder/$matchrt.exe]} { + lappend runtimes $matchrt.exe + } + } else { + lappend runtimes $matchrt + } + } + } + #assert $runtimes is a list of executable names suffixed with .exe if on windows - whether or not specified with .exe in the mapvfs.config + + + #todo - non kit based - zipkit? + # $runtimes may now include a dash entry "-" (from mapvfs.config file) + foreach rtname $runtimes { + #rtname of "-" indicates build a kit without a runtime + + #first configured runtime will be the one to use the same name as .vfs folder for output executable. Additional runtimes on this .vfs will need to suffix the runtime name to disambiguate. + #review: This mechanism may not be great for multiplatform builds ? We may be better off consistently combining vfsname and rtname and letting a later platform-specific step choose ones to install in bin with simpler names. + if {$rtname eq "-"} { + set targetkit $vfsname.kit + } else { + if {$::tcl_platform(platform) eq "windows"} { + set targetkit ${vfsname}.exe + } else { + set targetkit $vfsname + } + if {$targetkit in $exe_names_seen} { + #more than one runtime for this .vfs + set targetkit ${vfsname}_$rtname + } + } + lappend exe_names_seen $targetkit + # -- ---------- + set vfs_installer [punkcheck::installtrack new $installername $basedir/.punkcheck] + $vfs_installer set_source_target $sourcefolder $buildfolder + set vfs_event [$vfs_installer start_event {-make-step build_vfs}] + $vfs_event targetset_init INSTALL $buildfolder/$targetkit + $vfs_event targetset_addsource $sourcefolder/$vfs + if {$rtname ne "-"} { + $vfs_event targetset_addsource $buildfolder/build_$rtname + } + # -- ---------- + + set changed_unchanged [$vfs_event targetset_source_changes] + + if {[llength [dict get $changed_unchanged changed]] || [llength [$vfs_event get_targets_exist]] < [llength [$vfs_event get_targets]]} { + #source .vfs folder has changes + $vfs_event targetset_started + # -- --- --- --- --- --- + + #use + if {[file exists $buildfolder/$vfsname.new]} { + puts stderr "deleting existing $buildfolder/$vfsname.new" + file delete $buildfolder/$vfsname.new + } + + puts stdout "building $vfsname with sdx.. vfsdir:$vfs cwd: [pwd]" + + + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$rtname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose + } + } result]} { + if {$rtname ne "-"} { + puts stderr "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$rtname -verbose failed with msg: $result" + } else { + puts stderr "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose failed with msg: $result" + } + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } + + if {![file exists $buildfolder/$vfsname.new]} { + puts stderr "|err> make.tcl build didn't seem to produce output at $sourcefolder/_build/$vfsname.new" + $vfs_event targetset_end FAILED + exit 2 + } + + # -- --- --- + if {$::tcl_platform(platform) eq "windows"} { + set pscmd "tasklist" + } else { + set pscmd "ps" + } + + #killing process doesn't apply to .kit build + if {$rtname ne "-"} { + if {![catch { + exec $pscmd | grep $vfsname + } still_running]} { + + puts stdout "found $vfsname instances still running\n" + set count_killed 0 + foreach ln [split $still_running \n] { + puts stdout " $ln" + + if {$::tcl_platform(platform) eq "windows"} { + set pid [lindex $ln 1] + if {$forcekill} { + set killcmd [list taskkill /F /PID $pid] + } else { + set killcmd [list taskkill /PID $pid] + } + } else { + set pid [lindex $ln 0] + #review! + if {$forcekill} { + set killcmd [list kill -9 $pid] + } else { + set killcmd [list kill $pid] + } + } + puts stdout " pid: $pid (attempting to kill now using '$killcmd')" + if {[catch { + exec {*}$killcmd + } errMsg]} { + puts stderr "$killcmd returned an error:" + puts stderr $errMsg + if {!$forcekill} { + puts stderr "(try '[info script] -k' option to force kill)" + } + #avoid exiting if the kill failure was because the task has already exited + #review - *no running instance* works with windows taskkill - "*No such process*" works with kill -9 on FreeBSD and linux - other platforms? + if {![string match "*no running instance*" $errMsg] && ![string match "*No such process*" $errMsg]} { + exit 4 + } + } else { + puts stderr "$killcmd ran without error" + incr count_killed + } + } + if {$count_killed > 0} { + puts stderr "\nKilled $count_killed processes. Waiting a short time before attempting to delete executable" + after 1000 + } + } else { + puts stderr "Ok.. no running '$vfsname' processes found" + } + } + + if {[file exists $buildfolder/$targetkit]} { + puts stderr "deleting existing $buildfolder/$targetkit" + if {[catch { + file delete $buildfolder/$targetkit + } msg]} { + puts stderr "Failed to delete $buildfolder/$targetkit" + exit 4 + } + } + #WINDOWS filesystem 'tunneling' (file replacement within 15secs) could cause targetkit to copy ctime & shortname metadata from previous file! + #This is probably harmless - but worth being aware of. + file rename $buildfolder/$vfsname.new $buildfolder/$targetkit + # -- --- --- --- --- --- + $vfs_event targetset_end OK + + + after 200 + set deployment_folder [file dirname $sourcefolder]/bin + file mkdir $deployment_folder + + # -- ---------- + set bin_installer [punkcheck::installtrack new "make.tcl" $deployment_folder/.punkcheck] + $bin_installer set_source_target $buildfolder $deployment_folder + set bin_event [$bin_installer start_event {-make-step final_kit_install}] + $bin_event targetset_init INSTALL $deployment_folder/$targetkit + #todo - move final deployment step outside of the build vfs loop? (final deployment can fail and then isn't rerun even though _build and deployed versions differ, unless .vfs modified again) + #set last_completion [$bin_event targetset_last_complete] + + $bin_event targetset_addsource $buildfolder/$targetkit + $bin_event targetset_started + # -- ---------- + + + set delete_failed 0 + if {[file exists $deployment_folder/$targetkit]} { + puts stderr "deleting existing deployed at $deployment_folder/$targetkit" + if {[catch { + file delete $deployment_folder/$targetkit + } errMsg]} { + puts stderr "deletion of deployed version at $deployment_folder/$targetkit failed: $errMsg" + set delete_failed 1 + } + } + if {!$delete_failed} { + puts stdout "copying.." + puts stdout "$buildfolder/$targetkit" + puts stdout "to:" + puts stdout "$deployment_folder/$targetkit" + after 300 + file copy $buildfolder/$targetkit $deployment_folder/$targetkit + # -- ---------- + $bin_event targetset_end OK + # -- ---------- + } else { + $bin_event targetset_end FAILED -note "could not delete" + exit 5 + } + $bin_event destroy + $bin_installer destroy + + } else { + set skipped_vfs_build 1 + puts stderr "." + puts stdout "Skipping build for vfs $vfs with runtime $rtname - no change detected" + $vfs_event targetset_end SKIPPED + } + $vfs_event destroy + $vfs_installer destroy + } ;#end foreach rtname in runtimes + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- +} +cd $startdir + +puts stdout "done" +exit 0 + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/cksum-1.1.4.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/cksum-1.1.4.tm new file mode 100644 index 00000000..0fb17981 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/cksum-1.1.4.tm @@ -0,0 +1,200 @@ +# cksum.tcl - Copyright (C) 2002 Pat Thoyts +# +# Provides a Tcl only implementation of the unix cksum(1) command. This is +# similar to the sum(1) command but the algorithm is better defined and +# standardized across multiple platforms by POSIX 1003.2/D11.2 +# +# This command has been verified against the cksum command from the GNU +# textutils package version 2.0 +# +# ------------------------------------------------------------------------- +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# ------------------------------------------------------------------------- + +package require Tcl 8.5-; # tcl minimum version + +namespace eval ::crc { + namespace export cksum + + variable cksum_tbl [list 0x0 \ + 0x04C11DB7 0x09823B6E 0x0D4326D9 0x130476DC 0x17C56B6B \ + 0x1A864DB2 0x1E475005 0x2608EDB8 0x22C9F00F 0x2F8AD6D6 \ + 0x2B4BCB61 0x350C9B64 0x31CD86D3 0x3C8EA00A 0x384FBDBD \ + 0x4C11DB70 0x48D0C6C7 0x4593E01E 0x4152FDA9 0x5F15ADAC \ + 0x5BD4B01B 0x569796C2 0x52568B75 0x6A1936C8 0x6ED82B7F \ + 0x639B0DA6 0x675A1011 0x791D4014 0x7DDC5DA3 0x709F7B7A \ + 0x745E66CD 0x9823B6E0 0x9CE2AB57 0x91A18D8E 0x95609039 \ + 0x8B27C03C 0x8FE6DD8B 0x82A5FB52 0x8664E6E5 0xBE2B5B58 \ + 0xBAEA46EF 0xB7A96036 0xB3687D81 0xAD2F2D84 0xA9EE3033 \ + 0xA4AD16EA 0xA06C0B5D 0xD4326D90 0xD0F37027 0xDDB056FE \ + 0xD9714B49 0xC7361B4C 0xC3F706FB 0xCEB42022 0xCA753D95 \ + 0xF23A8028 0xF6FB9D9F 0xFBB8BB46 0xFF79A6F1 0xE13EF6F4 \ + 0xE5FFEB43 0xE8BCCD9A 0xEC7DD02D 0x34867077 0x30476DC0 \ + 0x3D044B19 0x39C556AE 0x278206AB 0x23431B1C 0x2E003DC5 \ + 0x2AC12072 0x128E9DCF 0x164F8078 0x1B0CA6A1 0x1FCDBB16 \ + 0x018AEB13 0x054BF6A4 0x0808D07D 0x0CC9CDCA 0x7897AB07 \ + 0x7C56B6B0 0x71159069 0x75D48DDE 0x6B93DDDB 0x6F52C06C \ + 0x6211E6B5 0x66D0FB02 0x5E9F46BF 0x5A5E5B08 0x571D7DD1 \ + 0x53DC6066 0x4D9B3063 0x495A2DD4 0x44190B0D 0x40D816BA \ + 0xACA5C697 0xA864DB20 0xA527FDF9 0xA1E6E04E 0xBFA1B04B \ + 0xBB60ADFC 0xB6238B25 0xB2E29692 0x8AAD2B2F 0x8E6C3698 \ + 0x832F1041 0x87EE0DF6 0x99A95DF3 0x9D684044 0x902B669D \ + 0x94EA7B2A 0xE0B41DE7 0xE4750050 0xE9362689 0xEDF73B3E \ + 0xF3B06B3B 0xF771768C 0xFA325055 0xFEF34DE2 0xC6BCF05F \ + 0xC27DEDE8 0xCF3ECB31 0xCBFFD686 0xD5B88683 0xD1799B34 \ + 0xDC3ABDED 0xD8FBA05A 0x690CE0EE 0x6DCDFD59 0x608EDB80 \ + 0x644FC637 0x7A089632 0x7EC98B85 0x738AAD5C 0x774BB0EB \ + 0x4F040D56 0x4BC510E1 0x46863638 0x42472B8F 0x5C007B8A \ + 0x58C1663D 0x558240E4 0x51435D53 0x251D3B9E 0x21DC2629 \ + 0x2C9F00F0 0x285E1D47 0x36194D42 0x32D850F5 0x3F9B762C \ + 0x3B5A6B9B 0x0315D626 0x07D4CB91 0x0A97ED48 0x0E56F0FF \ + 0x1011A0FA 0x14D0BD4D 0x19939B94 0x1D528623 0xF12F560E \ + 0xF5EE4BB9 0xF8AD6D60 0xFC6C70D7 0xE22B20D2 0xE6EA3D65 \ + 0xEBA91BBC 0xEF68060B 0xD727BBB6 0xD3E6A601 0xDEA580D8 \ + 0xDA649D6F 0xC423CD6A 0xC0E2D0DD 0xCDA1F604 0xC960EBB3 \ + 0xBD3E8D7E 0xB9FF90C9 0xB4BCB610 0xB07DABA7 0xAE3AFBA2 \ + 0xAAFBE615 0xA7B8C0CC 0xA379DD7B 0x9B3660C6 0x9FF77D71 \ + 0x92B45BA8 0x9675461F 0x8832161A 0x8CF30BAD 0x81B02D74 \ + 0x857130C3 0x5D8A9099 0x594B8D2E 0x5408ABF7 0x50C9B640 \ + 0x4E8EE645 0x4A4FFBF2 0x470CDD2B 0x43CDC09C 0x7B827D21 \ + 0x7F436096 0x7200464F 0x76C15BF8 0x68860BFD 0x6C47164A \ + 0x61043093 0x65C52D24 0x119B4BE9 0x155A565E 0x18197087 \ + 0x1CD86D30 0x029F3D35 0x065E2082 0x0B1D065B 0x0FDC1BEC \ + 0x3793A651 0x3352BBE6 0x3E119D3F 0x3AD08088 0x2497D08D \ + 0x2056CD3A 0x2D15EBE3 0x29D4F654 0xC5A92679 0xC1683BCE \ + 0xCC2B1D17 0xC8EA00A0 0xD6AD50A5 0xD26C4D12 0xDF2F6BCB \ + 0xDBEE767C 0xE3A1CBC1 0xE760D676 0xEA23F0AF 0xEEE2ED18 \ + 0xF0A5BD1D 0xF464A0AA 0xF9278673 0xFDE69BC4 0x89B8FD09 \ + 0x8D79E0BE 0x803AC667 0x84FBDBD0 0x9ABC8BD5 0x9E7D9662 \ + 0x933EB0BB 0x97FFAD0C 0xAFB010B1 0xAB710D06 0xA6322BDF \ + 0xA2F33668 0xBCB4666D 0xB8757BDA 0xB5365D03 0xB1F740B4 ] + + variable uid + if {![info exists uid]} {set uid 0} +} + +# crc::CksumInit -- +# +# Create and initialize a cksum context. This is cleaned up when we +# call CksumFinal to obtain the result. +# +proc ::crc::CksumInit {} { + variable uid + set token [namespace current]::[incr uid] + upvar #0 $token state + array set state {t 0 l 0} + return $token +} + +proc ::crc::CksumUpdate {token data} { + variable cksum_tbl + upvar #0 $token state + set t $state(t) + binary scan $data c* r + foreach {n} $r { + set index [expr { (($t >> 24) ^ ($n & 0xFF)) & 0xFF }] + # Since the introduction of built-in bigInt support with Tcl + # 8.5, bit-shifting $t to the left no longer overflows, + # keeping it 32 bits long. The value grows bigger and bigger + # instead - a severe hit on performance. For this reason we + # do a bitwise AND against 0xFFFFFFFF at each step to keep the + # value within limits. + set t [expr {0xFFFFFFFF & (($t << 8) ^ [lindex $cksum_tbl $index])}] + incr state(l) + } + set state(t) $t + return +} + +proc ::crc::CksumFinal {token} { + variable cksum_tbl + upvar #0 $token state + set t $state(t) + for {set i $state(l)} {$i > 0} {set i [expr {$i>>8}]} { + set index [expr {(($t >> 24) ^ $i) & 0xFF}] + set t [expr {0xFFFFFFFF & (($t << 8) ^ [lindex $cksum_tbl $index])}] + } + unset state + return [expr {~$t & 0xFFFFFFFF}] +} + +# crc::Pop -- +# +# Pop the nth element off a list. Used in options processing. +# +proc ::crc::Pop {varname {nth 0}} { + upvar $varname args + set r [lindex $args $nth] + set args [lreplace $args $nth $nth] + return $r +} + +# Description: +# Provide a Tcl equivalent of the unix cksum(1) command. +# Options: +# -filename name - return a checksum for the specified file. +# -format string - return the checksum using this format string. +# -chunksize size - set the chunking read size +# +proc ::crc::cksum {args} { + array set opts [list -filename {} -channel {} -chunksize 4096 \ + -format %u -command {}] + while {[string match -* [set option [lindex $args 0]]]} { + switch -glob -- $option { + -file* { set opts(-filename) [Pop args 1] } + -chan* { set opts(-channel) [Pop args 1] } + -chunk* { set opts(-chunksize) [Pop args 1] } + -for* { set opts(-format) [Pop args 1] } + -command { set opts(-command) [Pop args 1] } + default { + if {[llength $args] == 1} { break } + if {[string compare $option "--"] == 0} { Pop args ; break } + set err [join [lsort [array names opts -*]] ", "] + return -code error "bad option \"option\": must be $err" + } + } + Pop args + } + + if {$opts(-filename) != {}} { + set opts(-channel) [open $opts(-filename) r] + fconfigure $opts(-channel) -translation binary + } + + if {$opts(-channel) == {}} { + + if {[llength $args] != 1} { + return -code error "wrong # args: should be\ + cksum ?-format string?\ + -channel chan | -filename file | string" + } + set tok [CksumInit] + CksumUpdate $tok [lindex $args 0] + set r [CksumFinal $tok] + + } else { + + set tok [CksumInit] + while {![eof $opts(-channel)]} { + CksumUpdate $tok [read $opts(-channel) $opts(-chunksize)] + } + set r [CksumFinal $tok] + + if {$opts(-filename) != {}} { + close $opts(-channel) + } + } + + return [format $opts(-format) $r] +} + +# ------------------------------------------------------------------------- + +package provide cksum 1.1.4 + +# ------------------------------------------------------------------------- +# Local variables: +# mode: tcl +# indent-tabs-mode: nil +# End: diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/cmdline-1.5.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/cmdline-1.5.2.tm new file mode 100644 index 00000000..4e5e1df9 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/cmdline-1.5.2.tm @@ -0,0 +1,933 @@ +# cmdline.tcl -- +# +# This package provides a utility for parsing command line +# arguments that are processed by our various applications. +# It also includes a utility routine to determine the +# application name for use in command line errors. +# +# Copyright (c) 1998-2000 by Ajuba Solutions. +# Copyright (c) 2001-2015 by Andreas Kupries . +# Copyright (c) 2003 by David N. Welton +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. + +package require Tcl 8.5- +package provide cmdline 1.5.2 + +namespace eval ::cmdline { + namespace export getArgv0 getopt getKnownOpt getfiles getoptions \ + getKnownOptions usage +} + +# ::cmdline::getopt -- +# +# The cmdline::getopt works in a fashion like the standard +# C based getopt function. Given an option string and a +# pointer to an array or args this command will process the +# first argument and return info on how to proceed. +# +# Arguments: +# argvVar Name of the argv list that you +# want to process. If options are found the +# arg list is modified and the processed arguments +# are removed from the start of the list. +# optstring A list of command options that the application +# will accept. If the option ends in ".arg" the +# getopt routine will use the next argument as +# an argument to the option. Otherwise the option +# is a boolean that is set to 1 if present. +# optVar The variable pointed to by optVar +# contains the option that was found (without the +# leading '-' and without the .arg extension). +# valVar Upon success, the variable pointed to by valVar +# contains the value for the specified option. +# This value comes from the command line for .arg +# options, otherwise the value is 1. +# If getopt fails, the valVar is filled with an +# error message. +# +# Results: +# The getopt function returns 1 if an option was found, 0 if no more +# options were found, and -1 if an error occurred. + +proc ::cmdline::getopt {argvVar optstring optVar valVar} { + upvar 1 $argvVar argsList + upvar 1 $optVar option + upvar 1 $valVar value + + set result [getKnownOpt argsList $optstring option value] + + if {$result < 0} { + # Collapse unknown-option error into any-other-error result. + set result -1 + } + return $result +} + +# ::cmdline::getKnownOpt -- +# +# The cmdline::getKnownOpt works in a fashion like the standard +# C based getopt function. Given an option string and a +# pointer to an array or args this command will process the +# first argument and return info on how to proceed. +# +# Arguments: +# argvVar Name of the argv list that you +# want to process. If options are found the +# arg list is modified and the processed arguments +# are removed from the start of the list. Note that +# unknown options and the args that follow them are +# left in this list. +# optstring A list of command options that the application +# will accept. If the option ends in ".arg" the +# getopt routine will use the next argument as +# an argument to the option. Otherwise the option +# is a boolean that is set to 1 if present. +# optVar The variable pointed to by optVar +# contains the option that was found (without the +# leading '-' and without the .arg extension). +# valVar Upon success, the variable pointed to by valVar +# contains the value for the specified option. +# This value comes from the command line for .arg +# options, otherwise the value is 1. +# If getopt fails, the valVar is filled with an +# error message. +# +# Results: +# The getKnownOpt function returns 1 if an option was found, +# 0 if no more options were found, -1 if an unknown option was +# encountered, and -2 if any other error occurred. + +proc ::cmdline::getKnownOpt {argvVar optstring optVar valVar} { + upvar 1 $argvVar argsList + upvar 1 $optVar option + upvar 1 $valVar value + + # default settings for a normal return + set value "" + set option "" + set result 0 + + # check if we're past the end of the args list + if {[llength $argsList] != 0} { + + # if we got -- or an option that doesn't begin with -, return (skipping + # the --). otherwise process the option arg. + switch -glob -- [set arg [lindex $argsList 0]] { + "--" { + set argsList [lrange $argsList 1 end] + } + "--*" - + "-*" { + set option [string range $arg 1 end] + if {[string equal [string range $option 0 0] "-"]} { + set option [string range $arg 2 end] + } + + # support for format: [-]-option=value + set idx [string first "=" $option 1] + if {$idx != -1} { + set _val [string range $option [expr {$idx+1}] end] + set option [string range $option 0 [expr {$idx-1}]] + } + + if {[lsearch -exact $optstring $option] != -1} { + # Booleans are set to 1 when present + set value 1 + set result 1 + set argsList [lrange $argsList 1 end] + } elseif {[lsearch -exact $optstring "$option.arg"] != -1} { + set result 1 + set argsList [lrange $argsList 1 end] + + if {[info exists _val]} { + set value $_val + } elseif {[llength $argsList]} { + set value [lindex $argsList 0] + set argsList [lrange $argsList 1 end] + } else { + set value "Option \"$option\" requires an argument" + set result -2 + } + } else { + # Unknown option. + set value "Illegal option \"-$option\"" + set result -1 + } + } + default { + # Skip ahead + } + } + } + + return $result +} + +# ::cmdline::getoptions -- +# +# Process a set of command line options, filling in defaults +# for those not specified. This also generates an error message +# that lists the allowed flags if an incorrect flag is specified. +# +# Arguments: +# argvVar The name of the argument list, typically argv. +# We remove all known options and their args from it. +# In other words, after the call to this command the +# referenced variable contains only the non-options, +# and unknown options. +# optlist A list-of-lists where each element specifies an option +# in the form: +# (where flag takes no argument) +# flag comment +# +# (or where flag takes an argument) +# flag default comment +# +# If flag ends in ".arg" then the value is taken from the +# command line. Otherwise it is a boolean and appears in +# the result if present on the command line. If flag ends +# in ".secret", it will not be displayed in the usage. +# usage Text to include in the usage display. Defaults to +# "options:" +# +# Results +# Name value pairs suitable for using with array set. +# A modified `argvVar`. + +proc ::cmdline::getoptions {argvVar optlist {usage options:}} { + upvar 1 $argvVar argv + + set opts [GetOptionDefaults $optlist result] + + set argc [llength $argv] + while {[set err [getopt argv $opts opt arg]]} { + if {$err < 0} { + set result(?) "" + break + } + set result($opt) $arg + } + if {[info exist result(?)] || [info exists result(help)]} { + Error [usage $optlist $usage] USAGE + } + return [array get result] +} + +# ::cmdline::getKnownOptions -- +# +# Process a set of command line options, filling in defaults +# for those not specified. This ignores unknown flags, but generates +# an error message that lists the correct usage if a known option +# is used incorrectly. +# +# Arguments: +# argvVar The name of the argument list, typically argv. This +# We remove all known options and their args from it. +# In other words, after the call to this command the +# referenced variable contains only the non-options, +# and unknown options. +# optlist A list-of-lists where each element specifies an option +# in the form: +# flag default comment +# If flag ends in ".arg" then the value is taken from the +# command line. Otherwise it is a boolean and appears in +# the result if present on the command line. If flag ends +# in ".secret", it will not be displayed in the usage. +# usage Text to include in the usage display. Defaults to +# "options:" +# +# Results +# Name value pairs suitable for using with array set. +# A modified `argvVar`. + +proc ::cmdline::getKnownOptions {argvVar optlist {usage options:}} { + upvar 1 $argvVar argv + + set opts [GetOptionDefaults $optlist result] + + # As we encounter them, keep the unknown options and their + # arguments in this list. Before we return from this procedure, + # we'll prepend these args to the argList so that the application + # doesn't lose them. + + set unknownOptions [list] + + set argc [llength $argv] + while {[set err [getKnownOpt argv $opts opt arg]]} { + if {$err == -1} { + # Unknown option. + + # Skip over any non-option items that follow it. + # For now, add them to the list of unknownOptions. + lappend unknownOptions [lindex $argv 0] + set argv [lrange $argv 1 end] + while {([llength $argv] != 0) \ + && ![string match "-*" [lindex $argv 0]]} { + lappend unknownOptions [lindex $argv 0] + set argv [lrange $argv 1 end] + } + } elseif {$err == -2} { + set result(?) "" + break + } else { + set result($opt) $arg + } + } + + # Before returning, prepend the any unknown args back onto the + # argList so that the application doesn't lose them. + set argv [concat $unknownOptions $argv] + + if {[info exist result(?)] || [info exists result(help)]} { + Error [usage $optlist $usage] USAGE + } + return [array get result] +} + +# ::cmdline::GetOptionDefaults -- +# +# This internal procedure processes the option list (that was passed to +# the getopt or getKnownOpt procedure). The defaultArray gets an index +# for each option in the option list, the value of which is the option's +# default value. +# +# Arguments: +# optlist A list-of-lists where each element specifies an option +# in the form: +# flag default comment +# If flag ends in ".arg" then the value is taken from the +# command line. Otherwise it is a boolean and appears in +# the result if present on the command line. If flag ends +# in ".secret", it will not be displayed in the usage. +# defaultArrayVar The name of the array in which to put argument defaults. +# +# Results +# Name value pairs suitable for using with array set. + +proc ::cmdline::GetOptionDefaults {optlist defaultArrayVar} { + upvar 1 $defaultArrayVar result + + set opts {? help} + foreach opt $optlist { + set name [lindex $opt 0] + if {[regsub -- {\.secret$} $name {} name] == 1} { + # Need to hide this from the usage display and getopt + } + lappend opts $name + if {[regsub -- {\.arg$} $name {} name] == 1} { + + # Set defaults for those that take values. + + set default [lindex $opt 1] + set result($name) $default + } else { + # The default for booleans is false + set result($name) 0 + } + } + return $opts +} + +# ::cmdline::usage -- +# +# Generate an error message that lists the allowed flags. +# +# Arguments: +# optlist As for cmdline::getoptions +# usage Text to include in the usage display. Defaults to +# "options:" +# +# Results +# A formatted usage message + +proc ::cmdline::usage {optlist {usage {options:}}} { + set str "[getArgv0] $usage\n" + set longest 20 + set lines {} + foreach opt [concat $optlist \ + {{- "Forcibly stop option processing"} {help "Print this message"} {? "Print this message"}}] { + set name "-[lindex $opt 0]" + if {[regsub -- {\.secret$} $name {} name] == 1} { + # Hidden option + continue + } + if {[regsub -- {\.arg$} $name {} name] == 1} { + append name " value" + set desc "[lindex $opt 2] <[lindex $opt 1]>" + } else { + set desc "[lindex $opt 1]" + } + set n [string length $name] + if {$n > $longest} { set longest $n } + # max not available before 8.5 - set longest [expr {max($longest, )}] + lappend lines $name $desc + } + foreach {name desc} $lines { + append str "[string trimright [format " %-*s %s" $longest $name $desc]]\n" + } + + return $str +} + +# ::cmdline::getfiles -- +# +# Given a list of file arguments from the command line, compute +# the set of valid files. On windows, file globbing is performed +# on each argument. On Unix, only file existence is tested. If +# a file argument produces no valid files, a warning is optionally +# generated. +# +# This code also uses the full path for each file. If not +# given it prepends [pwd] to the filename. This ensures that +# these files will never conflict with files in our zip file. +# +# Arguments: +# patterns The file patterns specified by the user. +# quiet If this flag is set, no warnings will be generated. +# +# Results: +# Returns the list of files that match the input patterns. + +proc ::cmdline::getfiles {patterns quiet} { + set result {} + if {$::tcl_platform(platform) == "windows"} { + foreach pattern $patterns { + set pat [file join $pattern] + set files [glob -nocomplain -- $pat] + if {$files == {}} { + if {! $quiet} { + puts stdout "warning: no files match \"$pattern\"" + } + } else { + foreach file $files { + lappend result $file + } + } + } + } else { + set result $patterns + } + set files {} + foreach file $result { + # Make file an absolute path so that we will never conflict + # with files that might be contained in our zip file. + set fullPath [file join [pwd] $file] + + if {[file isfile $fullPath]} { + lappend files $fullPath + } elseif {! $quiet} { + puts stdout "warning: no files match \"$file\"" + } + } + return $files +} + +# ::cmdline::getArgv0 -- +# +# This command returns the "sanitized" version of argv0. It will strip +# off the leading path and remove the ".bin" extensions that our apps +# use because they must be wrapped by a shell script. +# +# Arguments: +# None. +# +# Results: +# The application name that can be used in error messages. + +proc ::cmdline::getArgv0 {} { + global argv0 + + set name [file tail $argv0] + return [file rootname $name] +} + +## +# ### ### ### ######### ######### ######### +## +# Now the typed versions of the above commands. +## +# ### ### ### ######### ######### ######### +## + +# typedCmdline.tcl -- +# +# This package provides a utility for parsing typed command +# line arguments that may be processed by various applications. +# +# Copyright (c) 2000 by Ross Palmer Mohn. +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id: cmdline.tcl,v 1.28 2011/02/23 17:41:52 andreas_kupries Exp $ + +namespace eval ::cmdline { + namespace export typedGetopt typedGetoptions typedUsage + + # variable cmdline::charclasses -- + # + # Create regexp list of allowable character classes + # from "string is" error message. + # + # Results: + # String of character class names separated by "|" characters. + + variable charclasses + #checker exclude badKey + catch {string is . .} charclasses + variable dummy + regexp -- {must be (.+)$} $charclasses dummy charclasses + regsub -all -- {, (or )?} $charclasses {|} charclasses + unset dummy +} + +# ::cmdline::typedGetopt -- +# +# The cmdline::typedGetopt works in a fashion like the standard +# C based getopt function. Given an option string and a +# pointer to a list of args this command will process the +# first argument and return info on how to proceed. In addition, +# you may specify a type for the argument to each option. +# +# Arguments: +# argvVar Name of the argv list that you want to process. +# If options are found, the arg list is modified +# and the processed arguments are removed from the +# start of the list. +# +# optstring A list of command options that the application +# will accept. If the option ends in ".xxx", where +# xxx is any valid character class to the tcl +# command "string is", then typedGetopt routine will +# use the next argument as a typed argument to the +# option. The argument must match the specified +# character classes (e.g. integer, double, boolean, +# xdigit, etc.). Alternatively, you may specify +# ".arg" for an untyped argument. +# +# optVar Upon success, the variable pointed to by optVar +# contains the option that was found (without the +# leading '-' and without the .xxx extension). If +# typedGetopt fails the variable is set to the empty +# string. SOMETIMES! Different for each -value! +# +# argVar Upon success, the variable pointed to by argVar +# contains the argument for the specified option. +# If typedGetopt fails, the variable is filled with +# an error message. +# +# Argument type syntax: +# Option that takes no argument. +# foo +# +# Option that takes a typeless argument. +# foo.arg +# +# Option that takes a typed argument. Allowable types are all +# valid character classes to the tcl command "string is". +# Currently must be one of alnum, alpha, ascii, control, +# boolean, digit, double, false, graph, integer, lower, print, +# punct, space, true, upper, wordchar, or xdigit. +# foo.double +# +# Option that takes an argument from a list. +# foo.(bar|blat) +# +# Argument quantifier syntax: +# Option that takes an optional argument. +# foo.arg? +# +# Option that takes a list of arguments terminated by "--". +# foo.arg+ +# +# Option that takes an optional list of arguments terminated by "--". +# foo.arg* +# +# Argument quantifiers work on all argument types, so, for +# example, the following is a valid option specification. +# foo.(bar|blat|blah)? +# +# Argument syntax miscellany: +# Options may be specified on the command line using a unique, +# shortened version of the option name. Given that program foo +# has an option list of {bar.alpha blah.arg blat.double}, +# "foo -b fob" returns an error, but "foo -ba fob" +# successfully returns {bar fob} +# +# Results: +# The typedGetopt function returns one of the following: +# 1 a valid option was found +# 0 no more options found to process +# -1 invalid option +# -2 missing argument to a valid option +# -3 argument to a valid option does not match type +# +# Known Bugs: +# When using options which include special glob characters, +# you must use the exact option. Abbreviating it can cause +# an error in the "cmdline::prefixSearch" procedure. + +proc ::cmdline::typedGetopt {argvVar optstring optVar argVar} { + variable charclasses + + upvar $argvVar argsList + + upvar $optVar retvar + upvar $argVar optarg + + # default settings for a normal return + set optarg "" + set retvar "" + set retval 0 + + # check if we're past the end of the args list + if {[llength $argsList] != 0} { + + # if we got -- or an option that doesn't begin with -, return (skipping + # the --). otherwise process the option arg. + switch -glob -- [set arg [lindex $argsList 0]] { + "--" { + set argsList [lrange $argsList 1 end] + } + + "-*" { + # Create list of options without their argument extensions + + set optstr "" + foreach str $optstring { + lappend optstr [file rootname $str] + } + + set _opt [string range $arg 1 end] + + set i [prefixSearch $optstr [file rootname $_opt]] + if {$i != -1} { + set opt [lindex $optstring $i] + + set quantifier "none" + if {[regexp -- {\.[^.]+([?+*])$} $opt dummy quantifier]} { + set opt [string range $opt 0 end-1] + } + + if {[string first . $opt] == -1} { + set retval 1 + set retvar $opt + set argsList [lrange $argsList 1 end] + + } elseif {[regexp -- "\\.(arg|$charclasses)\$" $opt dummy charclass] + || [regexp -- {\.\(([^)]+)\)} $opt dummy charclass]} { + if {[string equal arg $charclass]} { + set type arg + } elseif {[regexp -- "^($charclasses)\$" $charclass]} { + set type class + } else { + set type oneof + } + + set argsList [lrange $argsList 1 end] + set opt [file rootname $opt] + + while {1} { + if {[llength $argsList] == 0 + || [string equal "--" [lindex $argsList 0]]} { + if {[string equal "--" [lindex $argsList 0]]} { + set argsList [lrange $argsList 1 end] + } + + set oneof "" + if {$type == "arg"} { + set charclass an + } elseif {$type == "oneof"} { + set oneof ", one of $charclass" + set charclass an + } + + if {$quantifier == "?"} { + set retval 1 + set retvar $opt + set optarg "" + } elseif {$quantifier == "+"} { + set retvar $opt + if {[llength $optarg] < 1} { + set retval -2 + set optarg "Option requires at least one $charclass argument$oneof -- $opt" + } else { + set retval 1 + } + } elseif {$quantifier == "*"} { + set retval 1 + set retvar $opt + } else { + set optarg "Option requires $charclass argument$oneof -- $opt" + set retvar $opt + set retval -2 + } + set quantifier "" + } elseif {($type == "arg") + || (($type == "oneof") + && [string first "|[lindex $argsList 0]|" "|$charclass|"] != -1) + || (($type == "class") + && [string is $charclass [lindex $argsList 0]])} { + set retval 1 + set retvar $opt + lappend optarg [lindex $argsList 0] + set argsList [lrange $argsList 1 end] + } else { + set oneof "" + if {$type == "arg"} { + set charclass an + } elseif {$type == "oneof"} { + set oneof ", one of $charclass" + set charclass an + } + set optarg "Option requires $charclass argument$oneof -- $opt" + set retvar $opt + set retval -3 + + if {$quantifier == "?"} { + set retval 1 + set optarg "" + } + set quantifier "" + } + if {![regexp -- {[+*]} $quantifier]} { + break; + } + } + } else { + Error \ + "Illegal option type specification: must be one of $charclasses" \ + BAD OPTION TYPE + } + } else { + set optarg "Illegal option -- $_opt" + set retvar $_opt + set retval -1 + } + } + default { + # Skip ahead + } + } + } + + return $retval +} + +# ::cmdline::typedGetoptions -- +# +# Process a set of command line options, filling in defaults +# for those not specified. This also generates an error message +# that lists the allowed options if an incorrect option is +# specified. +# +# Arguments: +# argvVar The name of the argument list, typically argv +# optlist A list-of-lists where each element specifies an option +# in the form: +# +# option default comment +# +# Options formatting is as described for the optstring +# argument of typedGetopt. Default is for optionally +# specifying a default value. Comment is for optionally +# specifying a comment for the usage display. The +# options "--", "-help", and "-?" are automatically included +# in optlist. +# +# Argument syntax miscellany: +# Options formatting and syntax is as described in typedGetopt. +# There are two additional suffixes that may be applied when +# passing options to typedGetoptions. +# +# You may add ".multi" as a suffix to any option. For options +# that take an argument, this means that the option may be used +# more than once on the command line and that each additional +# argument will be appended to a list, which is then returned +# to the application. +# foo.double.multi +# +# If a non-argument option is specified as ".multi", it is +# toggled on and off for each time it is used on the command +# line. +# foo.multi +# +# If an option specification does not contain the ".multi" +# suffix, it is not an error to use an option more than once. +# In this case, the behavior for options with arguments is that +# the last argument is the one that will be returned. For +# options that do not take arguments, using them more than once +# has no additional effect. +# +# Options may also be hidden from the usage display by +# appending the suffix ".secret" to any option specification. +# Please note that the ".secret" suffix must be the last suffix, +# after any argument type specification and ".multi" suffix. +# foo.xdigit.multi.secret +# +# Results +# Name value pairs suitable for using with array set. + +proc ::cmdline::typedGetoptions {argvVar optlist {usage options:}} { + variable charclasses + + upvar 1 $argvVar argv + + set opts {? help} + foreach opt $optlist { + set name [lindex $opt 0] + if {[regsub -- {\.secret$} $name {} name] == 1} { + # Remove this extension before passing to typedGetopt. + } + if {[regsub -- {\.multi$} $name {} name] == 1} { + # Remove this extension before passing to typedGetopt. + + regsub -- {\..*$} $name {} temp + set multi($temp) 1 + } + lappend opts $name + if {[regsub -- "\\.(arg|$charclasses|\\(.+).?\$" $name {} name] == 1} { + # Set defaults for those that take values. + # Booleans are set just by being present, or not + + set dflt [lindex $opt 1] + if {$dflt != {}} { + set defaults($name) $dflt + } + } + } + set argc [llength $argv] + while {[set err [typedGetopt argv $opts opt arg]]} { + if {$err == 1} { + if {[info exists result($opt)] + && [info exists multi($opt)]} { + # Toggle boolean options or append new arguments + + if {$arg == ""} { + unset result($opt) + } else { + set result($opt) "$result($opt) $arg" + } + } else { + set result($opt) "$arg" + } + } elseif {($err == -1) || ($err == -3)} { + Error [typedUsage $optlist $usage] USAGE + } elseif {$err == -2 && ![info exists defaults($opt)]} { + Error [typedUsage $optlist $usage] USAGE + } + } + if {[info exists result(?)] || [info exists result(help)]} { + Error [typedUsage $optlist $usage] USAGE + } + foreach {opt dflt} [array get defaults] { + if {![info exists result($opt)]} { + set result($opt) $dflt + } + } + return [array get result] +} + +# ::cmdline::typedUsage -- +# +# Generate an error message that lists the allowed flags, +# type of argument taken (if any), default value (if any), +# and an optional description. +# +# Arguments: +# optlist As for cmdline::typedGetoptions +# +# Results +# A formatted usage message + +proc ::cmdline::typedUsage {optlist {usage {options:}}} { + variable charclasses + + set str "[getArgv0] $usage\n" + set longest 20 + set lines {} + foreach opt [concat $optlist \ + {{help "Print this message"} {? "Print this message"}}] { + set name "-[lindex $opt 0]" + if {[regsub -- {\.secret$} $name {} name] == 1} { + # Hidden option + continue + } + + if {[regsub -- {\.multi$} $name {} name] == 1} { + # Display something about multiple options + } + + if {[regexp -- "\\.(arg|$charclasses)\$" $name dummy charclass] || + [regexp -- {\.\(([^)]+)\)} $opt dummy charclass] + } { + regsub -- "\\..+\$" $name {} name + append name " $charclass" + set desc [lindex $opt 2] + set default [lindex $opt 1] + if {$default != ""} { + append desc " <$default>" + } + } else { + set desc [lindex $opt 1] + } + lappend accum $name $desc + set n [string length $name] + if {$n > $longest} { set longest $n } + # max not available before 8.5 - set longest [expr {max($longest, [string length $name])}] + } + foreach {name desc} $accum { + append str "[string trimright [format " %-*s %s" $longest $name $desc]]\n" + } + return $str +} + +# ::cmdline::prefixSearch -- +# +# Search a Tcl list for a pattern; searches first for an exact match, +# and if that fails, for a unique prefix that matches the pattern +# (i.e, first "lsearch -exact", then "lsearch -glob $pattern*" +# +# Arguments: +# list list of words +# pattern word to search for +# +# Results: +# Index of found word is returned. If no exact match or +# unique short version is found then -1 is returned. + +proc ::cmdline::prefixSearch {list pattern} { + # Check for an exact match + + if {[set pos [::lsearch -exact $list $pattern]] > -1} { + return $pos + } + + # Check for a unique short version + + set slist [lsort $list] + if {[set pos [::lsearch -glob $slist $pattern*]] > -1} { + # What if there is nothing for the check variable? + + set check [lindex $slist [expr {$pos + 1}]] + if {[string first $pattern $check] != 0} { + return [::lsearch -exact $list [lindex $slist $pos]] + } + } + return -1 +} +# ::cmdline::Error -- +# +# Internal helper to throw errors with a proper error-code attached. +# +# Arguments: +# message text of the error message to throw. +# args additional parts of the error code to use, +# with CMDLINE as basic prefix added by this command. +# +# Results: +# An error is thrown, always. + +proc ::cmdline::Error {message args} { + return -code error -errorcode [linsert $args 0 CMDLINE] $message +} diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictutils-0.2.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictutils-0.2.1.tm new file mode 100644 index 00000000..12ca495b --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictutils-0.2.1.tm @@ -0,0 +1,145 @@ +# dictutils.tcl -- + # + # Various dictionary utilities. + # + # Copyright (c) 2007 Neil Madden (nem@cs.nott.ac.uk). + # + # License: http://www.cs.nott.ac.uk/~nem/license.terms (Tcl-style). + # + + #2023 0.2.1 - changed "package require Tcl 8.6" to "package require Tcl 8.6-" + + package require Tcl 8.6- + package provide dictutils 0.2.1 + + namespace eval dictutils { + namespace export equal apply capture witharray nlappend + namespace ensemble create + + # dictutils witharray dictVar arrayVar script -- + # + # Unpacks the elements of the dictionary in dictVar into the array + # variable arrayVar and then evaluates the script. If the script + # completes with an ok, return or continue status, then the result is copied + # back into the dictionary variable, otherwise it is discarded. A + # [break] can be used to explicitly abort the transaction. + # + proc witharray {dictVar arrayVar script} { + upvar 1 $dictVar dict $arrayVar array + array set array $dict + try { uplevel 1 $script + } on break {} { # Discard the result + } on continue result - on ok result { + set dict [array get array] ;# commit changes + return $result + } on return {result opts} { + set dict [array get array] ;# commit changes + dict incr opts -level ;# remove this proc from level + return -options $opts $result + } + # All other cases will discard the changes and propagage + } + + # dictutils equal equalp d1 d2 -- + # + # Compare two dictionaries for equality. Two dictionaries are equal + # if they (a) have the same keys, (b) the corresponding values for + # each key in the two dictionaries are equal when compared using the + # equality predicate, equalp (passed as an argument). The equality + # predicate is invoked with the key and the two values from each + # dictionary as arguments. + # + proc equal {equalp d1 d2} { + if {[dict size $d1] != [dict size $d2]} { return 0 } + dict for {k v} $d1 { + if {![dict exists $d2 $k]} { return 0 } + if {![invoke $equalp $k $v [dict get $d2 $k]]} { return 0 } + } + return 1 + } + + # apply dictVar lambdaExpr ?arg1 arg2 ...? -- + # + # A combination of *dict with* and *apply*, this procedure creates a + # new procedure scope populated with the values in the dictionary + # variable. It then applies the lambdaTerm (anonymous procedure) in + # this new scope. If the procedure completes normally, then any + # changes made to variables in the dictionary are reflected back to + # the dictionary variable, otherwise they are ignored. This provides + # a transaction-style semantics whereby atomic updates to a + # dictionary can be performed. This procedure can also be useful for + # implementing a variety of control constructs, such as mutable + # closures. + # + proc apply {dictVar lambdaExpr args} { + upvar 1 $dictVar dict + set env $dict ;# copy + lassign $lambdaExpr params body ns + if {$ns eq ""} { set ns "::" } + set body [format { + upvar 1 env __env__ + dict with __env__ %s + } [list $body]] + set lambdaExpr [list $params $body $ns] + set rc [catch { ::apply $lambdaExpr {*}$args } ret opts] + if {$rc == 0} { + # Copy back any updates + set dict $env + } + return -options $opts $ret + } + + # capture ?level? ?exclude? ?include? -- + # + # Captures a snapshot of the current (scalar) variable bindings at + # $level on the stack into a dictionary environment. This dictionary + # can later be used with *dictutils apply* to partially restore the + # scope, creating a first approximation of closures. The *level* + # argument should be of the forms accepted by *uplevel* and + # designates which level to capture. It defaults to 1 as in uplevel. + # The *exclude* argument specifies an optional list of literal + # variable names to avoid when performing the capture. No variables + # matching any item in this list will be captured. The *include* + # argument can be used to specify a list of glob patterns of + # variables to capture. Only variables matching one of these + # patterns are captured. The default is a single pattern "*", for + # capturing all visible variables (as determined by *info vars*). + # + proc capture {{level 1} {exclude {}} {include {*}}} { + if {[string is integer $level]} { incr level } + set env [dict create] + foreach pattern $include { + foreach name [uplevel $level [list info vars $pattern]] { + if {[lsearch -exact -index 0 $exclude $name] >= 0} { continue } + upvar $level $name value + catch { dict set env $name $value } ;# no arrays + } + } + return $env + } + + # nlappend dictVar keyList ?value ...? + # + # Append zero or more elements to the list value stored in the given + # dictionary at the path of keys specified in $keyList. If $keyList + # specifies a non-existent path of keys, nlappend will behave as if + # the path mapped to an empty list. + # + proc nlappend {dictvar keylist args} { + upvar 1 $dictvar dict + if {[info exists dict] && [dict exists $dict {*}$keylist]} { + set list [dict get $dict {*}$keylist] + } + lappend list {*}$args + dict set dict {*}$keylist $list + } + + # invoke cmd args... -- + # + # Helper procedure to invoke a callback command with arguments at + # the global scope. The helper ensures that proper quotation is + # used. The command is expected to be a list, e.g. {string equal}. + # + proc invoke {cmd args} { uplevel #0 $cmd $args } + + } diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fileutil-1.16.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fileutil-1.16.1.tm new file mode 100644 index 00000000..6d5c737e --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/fileutil-1.16.1.tm @@ -0,0 +1,2311 @@ +# fileutil.tcl -- +# +# Tcl implementations of standard UNIX utilities. +# +# Copyright (c) 1998-2000 by Ajuba Solutions. +# Copyright (c) 2002 by Phil Ehrens (fileType) +# Copyright (c) 2005-2013 by Andreas Kupries +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. + +package require Tcl 8.5- +package require cmdline +package provide fileutil 1.16.1 + +namespace eval ::fileutil { + namespace export \ + grep find findByPattern cat touch foreachLine \ + jail stripPwd stripN stripPath tempdir tempfile \ + install fileType writeFile appendToFile \ + insertIntoFile removeFromFile replaceInFile \ + updateInPlace test tempdirReset maketempdir +} + +# ::fileutil::grep -- +# +# Implementation of grep. Adapted from the Tcler's Wiki. +# +# Arguments: +# pattern pattern to search for. +# files list of files to search; if NULL, uses stdin. +# +# Results: +# results list of matches + +proc ::fileutil::grep {pattern {files {}}} { + set result [list] + if {[llength $files] == 0} { + # read from stdin + set lnum 0 + while {[gets stdin line] >= 0} { + incr lnum + if {[regexp -- $pattern $line]} { + lappend result "${lnum}:${line}" + } + } + } else { + foreach filename $files { + set file [open $filename r] + set lnum 0 + while {[gets $file line] >= 0} { + incr lnum + if {[regexp -- $pattern $line]} { + lappend result "${filename}:${lnum}:${line}" + } + } + close $file + } + } + return $result +} + +# ::fileutil::find == + +# Below is the core command, which is portable across Tcl versions and +# platforms. Functionality which is common or platform and/or Tcl +# version dependent, has been factored out/ encapsulated into separate +# (small) commands. Only these commands may have multiple variant +# implementations per the available features of the Tcl core / +# platform. +# +# These commands are +# +# FADD - Add path result, performs filtering. Portable! +# GLOBF - Return files in a directory. Tcl version/platform dependent. +# GLOBD - Return dirs in a directory. Tcl version/platform dependent. +# ACCESS - Check directory for accessibility. Tcl version/platform dependent. + +proc ::fileutil::find {{basedir .} {filtercmd {}}} { + set result {} + set filt [string length $filtercmd] + + if {[file isfile $basedir]} { + # The base is a file, and therefore only possible result, + # modulo filtering. + + FADD $basedir + + } elseif {[file isdirectory $basedir]} { + # For a directory as base we do an iterative recursion through + # the directory hierarchy starting at the base. We use a queue + # (Tcl list) of directories we have to check. We access it by + # index, and stop when we have reached beyond the end of the + # list. This is faster than removing elements from the be- + # ginning of the list, as that entails copying down a possibly + # large list of directories, making it O(n*n). The index is + # faster, O(n), at the expense of memory. Nothing is deleted + # from the list until we have processed all directories in the + # hierarchy. + # + # We scan each directory at least twice. First for files, then + # for directories. The scans may internally make several + # passes (normal vs hidden files). + # + # Looped directory structures due to symbolic links are + # handled by _fully_ normalizing directory paths and checking + # if we encountered the normalized form before. The array + # 'known' is our cache where we record the known normalized + # paths. + + set pending [list $basedir] + set at 0 + array set parent {} + array set norm {} + Enter {} $basedir + + while {$at < [llength $pending]} { + # Get next directory not yet processed. + set current [lindex $pending $at] + incr at + + # Is the directory accessible? Continue if not. + ACCESS $current + + # Files first, then the sub-directories ... + + foreach f [GLOBF $current] { FADD $f } + + foreach f [GLOBD $current] { + # Ignore current and parent directory, this needs + # explicit filtering outside of the filter command. + if { + [string equal [file tail $f] "."] || + [string equal [file tail $f] ".."] + } continue + + # Extend result, modulo filtering. + FADD $f + + # Detection of symlink loops via a portable path + # normalization computing a canonical form of the path + # followed by a check if that canonical form was + # encountered before. If ok, record directory for + # expansion in future iterations. + + Enter $current $f + if {[Cycle $f]} continue + + lappend pending $f + } + } + } else { + return -code error "$basedir does not exist" + } + + return $result +} + +proc ::fileutil::Enter {parent path} { + upvar 1 parent _parent norm _norm + set _parent($path) $parent + set _norm($path) [fullnormalize $path] + return +} + +proc ::fileutil::Cycle {path} { + upvar 1 parent _parent norm _norm + set nform $_norm($path) + set paren $_parent($path) + while {$paren ne {}} { + if {$_norm($paren) eq $nform} { return yes } + set paren $_parent($paren) + } + return no +} + +# Helper command for fileutil::find. Performs the filtering of the +# result per a filter command for the candidates found by the +# traversal core, see above. This is portable. + +proc ::fileutil::FADD {filename} { + upvar 1 result result filt filt filtercmd filtercmd + if {!$filt} { + lappend result $filename + return + } + + set here [pwd] + cd [file dirname $filename] + + if {[uplevel 2 [linsert $filtercmd end [file tail $filename]]]} { + lappend result $filename + } + + cd $here + return +} + +# The next three helper commands for fileutil::find depend strongly on +# the version of Tcl, and partially on the platform. + +# 1. The -directory and -types switches were added to glob in Tcl +# 8.3. This means that we have to emulate them for Tcl 8.2. +# +# 2. In Tcl 8.3 using -types f will return only true files, but not +# links to files. This changed in 8.4+ where links to files are +# returned as well. So for 8.3 we have to handle the links +# separately (-types l) and also filter on our own. +# Note that Windows file links are hard links which are reported by +# -types f, but not -types l, so we can optimize that for the two +# platforms. +# +# Note further that we have to handle broken links on our own. They +# are not returned by glob yet we want them in the output. +# +# 3. In Tcl 8.3 we also have a crashing bug in glob (SIGABRT, "stat on +# a known file") when trying to perform 'glob -types {hidden f}' on +# a directory without e'x'ecute permissions. We code around by +# testing if we can cd into the directory (stat might return enough +# information too (mode), but possibly also not portable). +# +# For Tcl 8.2 and 8.4+ glob simply delivers an empty result +# (-nocomplain), without crashing. For them this command is defined +# so that the bytecode compiler removes it from the bytecode. +# +# This bug made the ACCESS helper necessary. +# We code around the problem by testing if we can cd into the +# directory (stat might return enough information too (mode), but +# possibly also not portable). + +if {[package vsatisfies [package present Tcl] 8.5]} { + # Tcl 8.5+. + # We have to check readability of "current" on our own, glob + # changed to error out instead of returning nothing. + + proc ::fileutil::ACCESS {args} {} + + proc ::fileutil::GLOBF {current} { + if {![file readable $current] || + [BadLink $current]} { + return {} + } + + set res [lsort -unique [concat \ + [glob -nocomplain -directory $current -types f -- *] \ + [glob -nocomplain -directory $current -types {hidden f} -- *]]] + + # Look for broken links (They are reported as neither file nor directory). + foreach l [lsort -unique [concat \ + [glob -nocomplain -directory $current -types l -- *] \ + [glob -nocomplain -directory $current -types {hidden l} -- *]]] { + if {[file isfile $l]} continue + if {[file isdirectory $l]} continue + lappend res $l + } + return [lsort -unique $res] + } + + proc ::fileutil::GLOBD {current} { + if {![file readable $current] || + [BadLink $current]} { + return {} + } + + lsort -unique [concat \ + [glob -nocomplain -directory $current -types d -- *] \ + [glob -nocomplain -directory $current -types {hidden d} -- *]] + } + + proc ::fileutil::BadLink {current} { + if {[file type $current] ne "link"} { return no } + + set dst [file join [file dirname $current] [file readlink $current]] + + if {![file exists $dst] || + ![file readable $dst]} { + return yes + } + + return no + } +} elseif {[package vsatisfies [package present Tcl] 8.4]} { + # Tcl 8.4+. + # (Ad 1) We have -directory, and -types, + # (Ad 2) Links are returned for -types f/d if they refer to files/dirs. + # (Ad 3) No bug to code around + + proc ::fileutil::ACCESS {args} {} + + proc ::fileutil::GLOBF {current} { + set res [lsort -unique [concat \ + [glob -nocomplain -directory $current -types f -- *] \ + [glob -nocomplain -directory $current -types {hidden f} -- *]]] + + # Look for broken links (They are reported as neither file nor directory). + foreach l [lsort -unique [concat \ + [glob -nocomplain -directory $current -types l -- *] \ + [glob -nocomplain -directory $current -types {hidden l} -- *]]] { + if {[file isfile $l]} continue + if {[file isdirectory $l]} continue + lappend res $l + } + return [lsort -unique $res] + } + + proc ::fileutil::GLOBD {current} { + lsort -unique [concat \ + [glob -nocomplain -directory $current -types d -- *] \ + [glob -nocomplain -directory $current -types {hidden d} -- *]] + } + +} elseif {[package vsatisfies [package present Tcl] 8.3]} { + # 8.3. + # (Ad 1) We have -directory, and -types, + # (Ad 2) Links are NOT returned for -types f/d, collect separately. + # No symbolic file links on Windows. + # (Ad 3) Bug to code around. + + proc ::fileutil::ACCESS {current} { + if {[catch { + set h [pwd] ; cd $current ; cd $h + }]} {return -code continue} + return + } + + if {[string equal $::tcl_platform(platform) windows]} { + proc ::fileutil::GLOBF {current} { + concat \ + [glob -nocomplain -directory $current -types f -- *] \ + [glob -nocomplain -directory $current -types {hidden f} -- *]] + } + } else { + proc ::fileutil::GLOBF {current} { + set l [concat \ + [glob -nocomplain -directory $current -types f -- *] \ + [glob -nocomplain -directory $current -types {hidden f} -- *]] + + foreach x [concat \ + [glob -nocomplain -directory $current -types l -- *] \ + [glob -nocomplain -directory $current -types {hidden l} -- *]] { + if {[file isdirectory $x]} continue + # We have now accepted files, links to files, and broken links. + lappend l $x + } + + return $l + } + } + + proc ::fileutil::GLOBD {current} { + set l [concat \ + [glob -nocomplain -directory $current -types d -- *] \ + [glob -nocomplain -directory $current -types {hidden d} -- *]] + + foreach x [concat \ + [glob -nocomplain -directory $current -types l -- *] \ + [glob -nocomplain -directory $current -types {hidden l} -- *]] { + if {![file isdirectory $x]} continue + lappend l $x + } + + return $l + } +} else { + # 8.2. + # (Ad 1,2,3) We do not have -directory, nor -types. Full emulation required. + + proc ::fileutil::ACCESS {args} {} + + if {[string equal $::tcl_platform(platform) windows]} { + # Hidden files cannot be handled by Tcl 8.2 in glob. We have + # to punt. + + proc ::fileutil::GLOBF {current} { + set current \\[join [split $current {}] \\] + set res {} + foreach x [glob -nocomplain -- [file join $current *]] { + if {[file isdirectory $x]} continue + if {[catch {file type $x}]} continue + # We have now accepted files, links to files, and + # broken links. We may also have accepted a directory + # as well, if the current path was inaccessible. This + # however will cause 'file type' to throw an error, + # hence the second check. + lappend res $x + } + return $res + } + + proc ::fileutil::GLOBD {current} { + set current \\[join [split $current {}] \\] + set res {} + foreach x [glob -nocomplain -- [file join $current *]] { + if {![file isdirectory $x]} continue + lappend res $x + } + return $res + } + } else { + # Hidden files on Unix are dot-files. We emulate the switch + # '-types hidden' by using an explicit pattern. + + proc ::fileutil::GLOBF {current} { + set current \\[join [split $current {}] \\] + set res {} + foreach x [glob -nocomplain -- [file join $current *] [file join $current .*]] { + if {[file isdirectory $x]} continue + if {[catch {file type $x}]} continue + # We have now accepted files, links to files, and + # broken links. We may also have accepted a directory + # as well, if the current path was inaccessible. This + # however will cause 'file type' to throw an error, + # hence the second check. + + lappend res $x + } + return $res + } + + proc ::fileutil::GLOBD {current} { + set current \\[join [split $current {}] \\] + set res {} + foreach x [glob -nocomplain -- $current/* [file join $current .*]] { + if {![file isdirectory $x]} continue + lappend res $x + } + return $res + } + } +} + +# ::fileutil::findByPattern -- +# +# Specialization of find. Finds files based on their names, +# which have to match the specified patterns. Options are used +# to specify which type of patterns (regexp-, glob-style) is +# used. +# +# Arguments: +# basedir Directory to start searching from. +# args Options (-glob, -regexp, --) followed by a +# list of patterns to search for. +# +# Results: +# files a list of interesting files. + +proc ::fileutil::findByPattern {basedir args} { + set pos 0 + set cmd ::fileutil::FindGlob + foreach a $args { + incr pos + switch -glob -- $a { + -- {break} + -regexp {set cmd ::fileutil::FindRegexp} + -glob {set cmd ::fileutil::FindGlob} + -* {return -code error "Unknown option $a"} + default {incr pos -1 ; break} + } + } + + set args [lrange $args $pos end] + + if {[llength $args] != 1} { + set pname [lindex [info level 0] 0] + return -code error \ + "wrong#args for \"$pname\", should be\ + \"$pname basedir ?-regexp|-glob? ?--? patterns\"" + } + + set patterns [lindex $args 0] + return [find $basedir [list $cmd $patterns]] +} + + +# ::fileutil::FindRegexp -- +# +# Internal helper. Filter command used by 'findByPattern' +# to match files based on regular expressions. +# +# Arguments: +# patterns List of regular expressions to match against. +# filename Name of the file to match against the patterns. +# Results: +# interesting A boolean flag. Set to true if the file +# matches at least one of the patterns. + +proc ::fileutil::FindRegexp {patterns filename} { + foreach p $patterns { + if {[regexp -- $p $filename]} { + return 1 + } + } + return 0 +} + +# ::fileutil::FindGlob -- +# +# Internal helper. Filter command used by 'findByPattern' +# to match files based on glob expressions. +# +# Arguments: +# patterns List of glob expressions to match against. +# filename Name of the file to match against the patterns. +# Results: +# interesting A boolean flag. Set to true if the file +# matches at least one of the patterns. + +proc ::fileutil::FindGlob {patterns filename} { + foreach p $patterns { + if {[string match $p $filename]} { + return 1 + } + } + return 0 +} + +# ::fileutil::stripPwd -- +# +# If the specified path references is a path in [pwd] (or [pwd] itself) it +# is made relative to [pwd]. Otherwise it is left unchanged. +# In the case of [pwd] itself the result is the string '.'. +# +# Arguments: +# path path to modify +# +# Results: +# path The (possibly) modified path. + +proc ::fileutil::stripPwd {path} { + + # [file split] is used to generate a canonical form for both + # paths, for easy comparison, and also one which is easy to modify + # using list commands. + + set pwd [pwd] + if {[string equal $pwd $path]} { + return "." + } + + set pwd [file split $pwd] + set npath [file split $path] + + if {[string match ${pwd}* $npath]} { + set path [eval [linsert [lrange $npath [llength $pwd] end] 0 file join ]] + } + return $path +} + +# ::fileutil::stripN -- +# +# Removes N elements from the beginning of the path. +# +# Arguments: +# path path to modify +# n number of elements to strip +# +# Results: +# path The modified path + +proc ::fileutil::stripN {path n} { + set path [file split $path] + if {$n >= [llength $path]} { + return {} + } else { + return [eval [linsert [lrange $path $n end] 0 file join]] + } +} + +# ::fileutil::stripPath -- +# +# If the specified path references/is a path in prefix (or prefix itself) it +# is made relative to prefix. Otherwise it is left unchanged. +# In the case of it being prefix itself the result is the string '.'. +# +# Arguments: +# prefix prefix to strip from the path. +# path path to modify +# +# Results: +# path The (possibly) modified path. + +if {[string equal $tcl_platform(platform) windows]} { + + # Windows. While paths are stored with letter-case preserved al + # comparisons have to be done case-insensitive. For reference see + # SF Tcllib Bug 2499641. + + proc ::fileutil::stripPath {prefix path} { + # [file split] is used to generate a canonical form for both + # paths, for easy comparison, and also one which is easy to modify + # using list commands. + + set prefix [file split $prefix] + set npath [file split $path] + + if {[string equal -nocase $prefix $npath]} { + return "." + } + + if {[string match -nocase "${prefix} *" $npath]} { + set path [eval [linsert [lrange $npath [llength $prefix] end] 0 file join ]] + } + return $path + } +} else { + proc ::fileutil::stripPath {prefix path} { + # [file split] is used to generate a canonical form for both + # paths, for easy comparison, and also one which is easy to modify + # using list commands. + + set prefix [file split $prefix] + set npath [file split $path] + + if {[string equal $prefix $npath]} { + return "." + } + + if {[string match "${prefix} *" $npath]} { + set path [eval [linsert [lrange $npath [llength $prefix] end] 0 file join ]] + } + return $path + } +} + +# ::fileutil::jail -- +# +# Ensures that the input path 'filename' stays within the +# directory 'jail'. In this way it prevents user-supplied paths +# from escaping the jail. +# +# Arguments: +# jail The path to the directory the other must +# not escape from. +# filename The path to prevent from escaping. +# +# Results: +# path The (possibly) modified path surely within +# the confines of the jail. + +proc fileutil::jail {jail filename} { + if {![string equal [file pathtype $filename] "relative"]} { + # Although the path to check is absolute (or volumerelative on + # windows) we cannot perform a simple prefix check to see if + # the path is inside the jail or not. We have to normalize + # both path and jail and then we can check. If the path is + # outside we make the original path relative and prefix it + # with the original jail. We do make the jail pseudo-absolute + # by prefixing it with the current working directory for that. + + # Normalized jail. Fully resolved sym links, if any. Our main + # complication is that normalize does not resolve symlinks in the + # last component of the path given to it, so we add a bogus + # component, resolve, and then strip it off again. That is why the + # code is so large and long. + + set njail [eval [list file join] [lrange [file split \ + [Normalize [file join $jail __dummy__]]] 0 end-1]] + + # Normalize filename. Fully resolved sym links, if + # any. S.a. for an explanation of the complication. + + set nfile [eval [list file join] [lrange [file split \ + [Normalize [file join $filename __dummy__]]] 0 end-1]] + + if {[string match ${njail}* $nfile]} { + return $filename + } + + # Outside the jail, put it inside. ... We normalize the input + # path lexically for this, to prevent escapes still lurking in + # the original path. (We cannot use the normalized path, + # symlinks may have bent it out of shape in unrecognizable ways. + + return [eval [linsert [lrange [file split \ + [lexnormalize $filename]] 1 end] 0 file join [pwd] $jail]] + } else { + # The path is relative, consider it as outside + # implicitly. Normalize it lexically! to prevent escapes, then + # put the jail in front, use PWD to ensure absoluteness. + + return [eval [linsert [file split [lexnormalize $filename]] 0 \ + file join [pwd] $jail]] + } +} + + +# ::fileutil::test -- +# +# Simple API to testing various properties of +# a path (read, write, file/dir, existence) +# +# Arguments: +# path path to test +# codes names of the properties to test +# msgvar Name of variable to leave an error +# message in. Optional. +# label Label for error message, optional +# +# Results: +# ok boolean flag, set if the path passes +# all tests. + +namespace eval ::fileutil { + variable test + array set test { + read {readable {Read access is denied}} + write {writable {Write access is denied}} + exec {executable {Is not executable}} + exists {exists {Does not exist}} + file {isfile {Is not a file}} + dir {isdirectory {Is not a directory}} + } +} + +proc ::fileutil::test {path codes {msgvar {}} {label {}}} { + variable test + + if {[string equal $msgvar ""]} { + set msg "" + } else { + upvar 1 $msgvar msg + } + + if {![string equal $label ""]} {append label { }} + + if {![regexp {^(read|write|exec|exists|file|dir)} $codes]} { + # Translate single characters into proper codes + set codes [string map { + r read w write e exists x exec f file d dir + } [split $codes {}]] + } + + foreach c $codes { + foreach {cmd text} $test($c) break + if {![file $cmd $path]} { + set msg "$label\"$path\": $text" + return 0 + } + } + + return 1 +} + +# ::fileutil::cat -- +# +# Tcl implementation of the UNIX "cat" command. Returns the contents +# of the specified files. +# +# Arguments: +# args names of the files to read, interspersed with options +# to set encodings, translations, or eofchar. +# +# Results: +# data data read from the file. + +proc ::fileutil::cat {args} { + # Syntax: (?options? file)+ + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + if {![llength $args]} { + # Argument processing stopped with arguments missing. + return -code error \ + "wrong#args: should be\ + [lindex [info level 0] 0] ?-eofchar|-translation|-encoding arg?+ file ..." + } + + # We go through the arguments using foreach and keeping track of + # the index we are at. We do not shift the arguments out to the + # left. That is inherently quadratic, copying everything down. + + set opts {} + set mode maybeopt + set channels {} + + foreach a $args { + if {[string equal $mode optarg]} { + lappend opts $a + set mode maybeopt + continue + } elseif {[string equal $mode maybeopt]} { + if {[string match -* $a]} { + switch -exact -- $a { + -encoding - + -translation - + -eofchar { + lappend opts $a + set mode optarg + continue + } + -- { + set mode file + continue + } + default { + return -code error \ + "Bad option \"$a\",\ + expected one of\ + -encoding, -eofchar,\ + or -translation" + } + } + } + # Not an option, but a file. Change mode and fall through. + set mode file + } + # Process file arguments + + if {[string equal $a -]} { + # Stdin reference is special. + + # Test that the current options are all ok. + # For stdin we have to avoid closing it. + + set old [fconfigure stdin] + set fail [catch { + SetOptions stdin $opts + } msg] ; # {} + SetOptions stdin $old + + if {$fail} { + return -code error $msg + } + + lappend channels [list $a $opts 0] + } else { + if {![file exists $a]} { + return -code error "Cannot read file \"$a\", does not exist" + } elseif {![file isfile $a]} { + return -code error "Cannot read file \"$a\", is not a file" + } elseif {![file readable $a]} { + return -code error "Cannot read file \"$a\", read access is denied" + } + + # Test that the current options are all ok. + set c [open $a r] + set fail [catch { + SetOptions $c $opts + } msg] ; # {} + close $c + if {$fail} { + return -code error $msg + } + + lappend channels [list $a $opts [file size $a]] + } + + # We may have more options and files coming after. + set mode maybeopt + } + + if {![string equal $mode maybeopt]} { + # Argument processing stopped with arguments missing. + return -code error \ + "wrong#args: should be\ + [lindex [info level 0] 0] ?-eofchar|-translation|-encoding arg?+ file ..." + } + + set data "" + foreach c $channels { + foreach {fname opts size} $c break + + if {[string equal $fname -]} { + set old [fconfigure stdin] + SetOptions stdin $opts + append data [read stdin] + SetOptions stdin $old + continue + } + + set c [open $fname r] + SetOptions $c $opts + + if {$size > 0} { + # Used the [file size] command to get the size, which + # preallocates memory, rather than trying to grow it as + # the read progresses. + append data [read $c $size] + } else { + # if the file has zero bytes it is either empty, or + # something where [file size] reports 0 but the file + # actually has data (like the files in the /proc + # filesystem on Linux). + append data [read $c] + } + close $c + } + + return $data +} + +# ::fileutil::writeFile -- +# +# Write the specified data into the named file, +# creating it if necessary. +# +# Arguments: +# options... Options and arguments. +# filename Path to the file to write. +# data The data to write into the file +# +# Results: +# None. + +proc ::fileutil::writeFile {args} { + # Syntax: ?options? file data + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + Spec Writable $args opts fname data + + # Now perform the requested operation. + + file mkdir [file dirname $fname] + set c [open $fname w] + SetOptions $c $opts + puts -nonewline $c $data + close $c + return +} + +# ::fileutil::appendToFile -- +# +# Append the specified data at the end of the named file, +# creating it if necessary. +# +# Arguments: +# options... Options and arguments. +# filename Path to the file to extend. +# data The data to extend the file with. +# +# Results: +# None. + +proc ::fileutil::appendToFile {args} { + # Syntax: ?options? file data + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + Spec Writable $args opts fname data + + # Now perform the requested operation. + + file mkdir [file dirname $fname] + set c [open $fname a] + SetOptions $c $opts + set at [tell $c] + puts -nonewline $c $data + close $c + return $at +} + +# ::fileutil::insertIntoFile -- +# +# Insert the specified data into the named file, +# creating it if necessary, at the given locaton. +# +# Arguments: +# options... Options and arguments. +# filename Path to the file to extend. +# data The data to extend the file with. +# +# Results: +# None. + +proc ::fileutil::insertIntoFile {args} { + + # Syntax: ?options? file at data + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + Spec ReadWritable $args opts fname at data + + set max [file size $fname] + CheckLocation $at $max insertion + + if {[string length $data] == 0} { + # Another degenerate case, inserting nothing. + # Leave the file well enough alone. + return + } + + foreach {c o t} [Open2 $fname $opts] break + + # The degenerate cases of both appending and insertion at the + # beginning of the file allow more optimized implementations of + # the operation. + + if {$at == 0} { + puts -nonewline $o $data + fcopy $c $o + } elseif {$at == $max} { + fcopy $c $o + puts -nonewline $o $data + } else { + fcopy $c $o -size $at + puts -nonewline $o $data + fcopy $c $o + } + + Close2 $fname $t $c $o + return +} + +# ::fileutil::removeFromFile -- +# +# Remove n characters from the named file, +# starting at the given locaton. +# +# Arguments: +# options... Options and arguments. +# filename Path to the file to extend. +# at Location to start the removal from. +# n Number of characters to remove. +# +# Results: +# None. + +proc ::fileutil::removeFromFile {args} { + + # Syntax: ?options? file at n + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + Spec ReadWritable $args opts fname at n + + set max [file size $fname] + CheckLocation $at $max removal + CheckLength $n $at $max removal + + if {$n == 0} { + # Another degenerate case, removing nothing. + # Leave the file well enough alone. + return + } + + foreach {c o t} [Open2 $fname $opts] break + + # The degenerate cases of both removal from the beginning or end + # of the file allow more optimized implementations of the + # operation. + + if {$at == 0} { + seek $c $n current + fcopy $c $o + } elseif {($at + $n) == $max} { + fcopy $c $o -size $at + # Nothing further to copy. + } else { + fcopy $c $o -size $at + seek $c $n current + fcopy $c $o + } + + Close2 $fname $t $c $o + return +} + +# ::fileutil::replaceInFile -- +# +# Remove n characters from the named file, +# starting at the given locaton, and replace +# it with the given data. +# +# Arguments: +# options... Options and arguments. +# filename Path to the file to extend. +# at Location to start the removal from. +# n Number of characters to remove. +# data The replacement data. +# +# Results: +# None. + +proc ::fileutil::replaceInFile {args} { + + # Syntax: ?options? file at n data + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + Spec ReadWritable $args opts fname at n data + + set max [file size $fname] + CheckLocation $at $max replacement + CheckLength $n $at $max replacement + + if { + ($n == 0) && + ([string length $data] == 0) + } { + # Another degenerate case, replacing nothing with + # nothing. Leave the file well enough alone. + return + } + + foreach {c o t} [Open2 $fname $opts] break + + # Check for degenerate cases and handle them separately, + # i.e. strip the no-op parts out of the general implementation. + + if {$at == 0} { + if {$n == 0} { + # Insertion instead of replacement. + + puts -nonewline $o $data + fcopy $c $o + + } elseif {[string length $data] == 0} { + # Removal instead of replacement. + + seek $c $n current + fcopy $c $o + + } else { + # General replacement at front. + + seek $c $n current + puts -nonewline $o $data + fcopy $c $o + } + } elseif {($at + $n) == $max} { + if {$n == 0} { + # Appending instead of replacement + + fcopy $c $o + puts -nonewline $o $data + + } elseif {[string length $data] == 0} { + # Truncating instead of replacement + + fcopy $c $o -size $at + # Nothing further to copy. + + } else { + # General replacement at end + + fcopy $c $o -size $at + puts -nonewline $o $data + } + } else { + if {$n == 0} { + # General insertion. + + fcopy $c $o -size $at + puts -nonewline $o $data + fcopy $c $o + + } elseif {[string length $data] == 0} { + # General removal. + + fcopy $c $o -size $at + seek $c $n current + fcopy $c $o + + } else { + # General replacement. + + fcopy $c $o -size $at + seek $c $n current + puts -nonewline $o $data + fcopy $c $o + } + } + + Close2 $fname $t $c $o + return +} + +# ::fileutil::updateInPlace -- +# +# Run command prefix on the contents of the +# file and replace them with the result of +# the command. +# +# Arguments: +# options... Options and arguments. +# filename Path to the file to extend. +# cmd Command prefix to run. +# +# Results: +# None. + +proc ::fileutil::updateInPlace {args} { + # Syntax: ?options? file cmd + # options = -encoding ENC + # | -translation TRA + # | -eofchar ECH + # | -- + + Spec ReadWritable $args opts fname cmd + + # readFile/cat inlined ... + + set c [open $fname r] + SetOptions $c $opts + set data [read $c] + close $c + + # Transformation. Abort and do not modify the target file if an + # error was raised during this step. + + lappend cmd $data + set code [catch {uplevel 1 $cmd} res] + if {$code} { + return -code $code $res + } + + # writeFile inlined, with careful preservation of old contents + # until we are sure that the write was ok. + + if {[catch { + file rename -force $fname ${fname}.bak + + set o [open $fname w] + SetOptions $o $opts + puts -nonewline $o $res + close $o + + file delete -force ${fname}.bak + } msg]} { + if {[file exists ${fname}.bak]} { + catch { + file rename -force ${fname}.bak $fname + } + return -code error $msg + } + } + return +} + +proc ::fileutil::Writable {fname mv} { + upvar 1 $mv msg + if {[file exists $fname]} { + if {![file isfile $fname]} { + set msg "Cannot use file \"$fname\", is not a file" + return 0 + } elseif {![file writable $fname]} { + set msg "Cannot use file \"$fname\", write access is denied" + return 0 + } + } + return 1 +} + +proc ::fileutil::ReadWritable {fname mv} { + upvar 1 $mv msg + if {![file exists $fname]} { + set msg "Cannot use file \"$fname\", does not exist" + return 0 + } elseif {![file isfile $fname]} { + set msg "Cannot use file \"$fname\", is not a file" + return 0 + } elseif {![file writable $fname]} { + set msg "Cannot use file \"$fname\", write access is denied" + return 0 + } elseif {![file readable $fname]} { + set msg "Cannot use file \"$fname\", read access is denied" + return 0 + } + return 1 +} + +proc ::fileutil::Spec {check alist ov fv args} { + upvar 1 $ov opts $fv fname + + set n [llength $args] ; # Num more args + incr n ; # Count path as well + + set opts {} + set mode maybeopt + + set at 0 + foreach a $alist { + if {[string equal $mode optarg]} { + lappend opts $a + set mode maybeopt + incr at + continue + } elseif {[string equal $mode maybeopt]} { + if {[string match -* $a]} { + switch -exact -- $a { + -encoding - + -translation - + -eofchar { + lappend opts $a + set mode optarg + incr at + continue + } + -- { + # Stop processing. + incr at + break + } + default { + return -code error \ + "Bad option \"$a\",\ + expected one of\ + -encoding, -eofchar,\ + or -translation" + } + } + } + # Not an option, but a file. + # Stop processing. + break + } + } + + if {([llength $alist] - $at) != $n} { + # Argument processing stopped with arguments missing, or too + # many + return -code error \ + "wrong#args: should be\ + [lindex [info level 1] 0] ?-eofchar|-translation|-encoding arg? file $args" + } + + set fname [lindex $alist $at] + incr at + foreach \ + var $args \ + val [lrange $alist $at end] { + upvar 1 $var A + set A $val + } + + # Check given path ... + + if {![eval [linsert $check end $a msg]]} { + return -code error $msg + } + + return +} + +proc ::fileutil::Open2 {fname opts} { + set c [open $fname r] + set t [tempfile] + set o [open $t w] + + SetOptions $c $opts + SetOptions $o $opts + + return [list $c $o $t] +} + +proc ::fileutil::Close2 {f temp in out} { + close $in + close $out + + file copy -force $f ${f}.bak + file rename -force $temp $f + file delete -force ${f}.bak + return +} + +proc ::fileutil::SetOptions {c opts} { + if {![llength $opts]} return + eval [linsert $opts 0 fconfigure $c] + return +} + +proc ::fileutil::CheckLocation {at max label} { + if {![string is integer -strict $at]} { + return -code error \ + "Expected integer but got \"$at\"" + } elseif {$at < 0} { + return -code error \ + "Bad $label point $at, before start of data" + } elseif {$at > $max} { + return -code error \ + "Bad $label point $at, behind end of data" + } +} + +proc ::fileutil::CheckLength {n at max label} { + if {![string is integer -strict $n]} { + return -code error \ + "Expected integer but got \"$n\"" + } elseif {$n < 0} { + return -code error \ + "Bad $label size $n" + } elseif {($at + $n) > $max} { + return -code error \ + "Bad $label size $n, going behind end of data" + } +} + +# ::fileutil::foreachLine -- +# +# Executes a script for every line in a file. +# +# Arguments: +# var name of the variable to contain the lines +# filename name of the file to read. +# cmd The script to execute. +# +# Results: +# None. + +proc ::fileutil::foreachLine {var filename cmd} { + upvar 1 $var line + set fp [open $filename r] + + # -future- Use try/eval from tcllib/control + catch { + set code 0 + set result {} + set return 0 + while {[gets $fp line] >= 0} { + set code [catch {uplevel 1 $cmd} result options] + if {$code == 2} { + set return 1 + set code [dict get $options -code] + break + } elseif {$code != 0 && $code != 4} { + break + } + } + } + close $fp + + if {$return || $code == 1 || $code > 4} { + return -options $options $result + } + return $result +} + +# ::fileutil::touch -- +# +# Tcl implementation of the UNIX "touch" command. +# +# touch [-a] [-m] [-c] [-r ref_file] [-t time] filename ... +# +# Arguments: +# -a change the access time only, unless -m also specified +# -m change the modification time only, unless -a also specified +# -c silently prevent creating a file if it did not previously exist +# -r ref_file use the ref_file's time instead of the current time +# -t time use the specified time instead of the current time +# ("time" is an integer clock value, like [clock seconds]) +# filename ... the files to modify +# +# Results +# None. +# +# Errors: +# Both of "-r" and "-t" cannot be specified. + +if {[package vsatisfies [package provide Tcl] 8.3]} { + namespace eval ::fileutil { + namespace export touch + } + + proc ::fileutil::touch {args} { + # Don't bother catching errors, just let them propagate up + + set options { + {a "set the atime only"} + {m "set the mtime only"} + {c "do not create non-existant files"} + {r.arg "" "use time from ref_file"} + {t.arg -1 "use specified time"} + } + set usage ": [lindex [info level 0] 0]\ + \[options] filename ...\noptions:" + array set params [::cmdline::getoptions args $options $usage] + + # process -a and -m options + set set_atime [set set_mtime "true"] + if { $params(a) && ! $params(m)} {set set_mtime "false"} + if {! $params(a) && $params(m)} {set set_atime "false"} + + # process -r and -t + set has_t [expr {$params(t) != -1}] + set has_r [expr {[string length $params(r)] > 0}] + if {$has_t && $has_r} { + return -code error "Cannot specify both -r and -t" + } elseif {$has_t} { + set atime [set mtime $params(t)] + } elseif {$has_r} { + file stat $params(r) stat + set atime $stat(atime) + set mtime $stat(mtime) + } else { + set atime [set mtime [clock seconds]] + } + + # do it + foreach filename $args { + if {! [file exists $filename]} { + if {$params(c)} {continue} + close [open $filename w] + } + if {$set_atime} {file atime $filename $atime} + if {$set_mtime} {file mtime $filename $mtime} + } + return + } +} + +# ::fileutil::fileType -- +# +# Do some simple heuristics to determine file type. +# +# +# Arguments: +# filename Name of the file to test. +# +# Results +# type Type of the file. May be a list if multiple tests +# are positive (eg, a file could be both a directory +# and a link). In general, the list proceeds from most +# general (eg, binary) to most specific (eg, gif), so +# the full type for a GIF file would be +# "binary graphic gif" +# +# At present, the following types can be detected: +# +# directory +# empty +# binary +# text +# script +# executable [elf, dos, ne, pe] +# binary graphic [gif, jpeg, png, tiff, bitmap, icns] +# ps, eps, pdf +# html +# xml +# message pgp +# compressed [bzip, gzip, zip, tar] +# audio [mpeg, wave] +# gravity_wave_data_frame +# link +# doctools, doctoc, and docidx documentation files. +# + +proc ::fileutil::fileType {filename} { + ;## existence test + if { ! [ file exists $filename ] } { + set err "file not found: '$filename'" + return -code error $err + } + ;## directory test + if { [ file isdirectory $filename ] } { + set type directory + if { ! [ catch {file readlink $filename} ] } { + lappend type link + } + return $type + } + ;## empty file test + if { ! [ file size $filename ] } { + set type empty + if { ! [ catch {file readlink $filename} ] } { + lappend type link + } + return $type + } + set bin_rx {[\x00-\x08\x0b\x0e-\x1f]} + + if { [ catch { + set fid [ open $filename r ] + fconfigure $fid -translation binary + fconfigure $fid -buffersize 1024 + fconfigure $fid -buffering full + set test [ read $fid 1024 ] + ::close $fid + } err ] } { + catch { ::close $fid } + return -code error "::fileutil::fileType: $err" + } + + if { [ regexp $bin_rx $test ] } { + set type binary + set binary 1 + } else { + set type text + set binary 0 + } + + # SF Tcllib bug [795585]. Allowing whitespace between #! + # and path of script interpreter + + set metakit 0 + + if { [ regexp {^\#\!\s*(\S+)} $test -> terp ] } { + lappend type script $terp + } elseif {([regexp "\\\[manpage_begin " $test] && + !([regexp -- {--- !doctools ---} $test] || [regexp -- "!tcl\.tk//DSL doctools//EN//" $test])) || + ([regexp -- {--- doctools ---} $test] || [regexp -- "tcl\.tk//DSL doctools//EN//" $test])} { + lappend type doctools + } elseif {([regexp "\\\[toc_begin " $test] && + !([regexp -- {--- !doctoc ---} $test] || [regexp -- "!tcl\.tk//DSL doctoc//EN//" $test])) || + ([regexp -- {--- doctoc ---} $test] || [regexp -- "tcl\.tk//DSL doctoc//EN//" $test])} { + lappend type doctoc + } elseif {([regexp "\\\[index_begin " $test] && + !([regexp -- {--- !docidx ---} $test] || [regexp -- "!tcl\.tk//DSL docidx//EN//" $test])) || + ([regexp -- {--- docidx ---} $test] || [regexp -- "tcl\.tk//DSL docidx//EN//" $test])} { + lappend type docidx + } elseif {[regexp -- "tcl\\.tk//DSL diagram//EN//" $test]} { + lappend type tkdiagram + } elseif { $binary && [ regexp {^[\x7F]ELF} $test ] } { + lappend type executable elf + } elseif { $binary && [string match "MZ*" $test] } { + if { [scan [string index $test 24] %c] < 64 } { + lappend type executable dos + } else { + binary scan [string range $test 60 61] s next + set sig [string range $test $next [expr {$next + 1}]] + if { $sig == "NE" || $sig == "PE" } { + lappend type executable [string tolower $sig] + } else { + lappend type executable dos + } + } + } elseif { $binary && [string match "SQLite format 3\x00*" $test] } { + lappend type sqlite3 + + # Check for various sqlite-based application file formats. + set appid [string range $test 68 71] + if {$appid eq "\x0f\x05\x51\x12"} { + lappend type fossil-checkout + } elseif {$appid eq "\x0f\x05\x51\x13"} { + lappend type fossil-global-config + } elseif {$appid eq "\x0f\x05\x51\x11"} { + lappend type fossil-repository + } else { + # encode the appid as hex and append that. + binary scan $appid H8 aid + lappend type A$aid + } + + } elseif { $binary && [string match "BZh91AY\&SY*" $test] } { + lappend type compressed bzip + } elseif { $binary && [string match "\x1f\x8b*" $test] } { + lappend type compressed gzip + } elseif { $binary && [string range $test 257 262] == "ustar\x00" } { + lappend type compressed tar + } elseif { $binary && [string match "\x50\x4b\x03\x04*" $test] } { + lappend type compressed zip + } elseif { $binary && [string match "GIF*" $test] } { + lappend type graphic gif + } elseif { $binary && [string match "icns*" $test] } { + lappend type graphic icns bigendian + } elseif { $binary && [string match "snci*" $test] } { + lappend type graphic icns smallendian + } elseif { $binary && [string match "\x89PNG*" $test] } { + lappend type graphic png + } elseif { $binary && [string match "\xFF\xD8\xFF*" $test] } { + binary scan $test x3H2x2a5 marker txt + if { $marker == "e0" && $txt == "JFIF\x00" } { + lappend type graphic jpeg jfif + } elseif { $marker == "e1" && $txt == "Exif\x00" } { + lappend type graphic jpeg exif + } + } elseif { $binary && [string match "MM\x00\**" $test] } { + lappend type graphic tiff + } elseif { $binary && [string match "BM*" $test] && [string range $test 6 9] == "\x00\x00\x00\x00" } { + lappend type graphic bitmap + } elseif { ! $binary && [string match -nocase "*\*" $test] } { + lappend type html + } elseif {[string match "\%PDF\-*" $test] } { + lappend type pdf + } elseif { [string match "\%\!PS\-*" $test] } { + lappend type ps + if { [string match "* EPSF\-*" $test] } { + lappend type eps + } + } elseif { [string match -nocase "*\<\?xml*" $test] } { + lappend type xml + if { [ regexp -nocase {\<\!DOCTYPE\s+(\S+)} $test -> doctype ] } { + lappend type $doctype + } + } elseif { [string match {*BEGIN PGP MESSAGE*} $test] } { + lappend type message pgp + } elseif { $binary && [string match {IGWD*} $test] } { + lappend type gravity_wave_data_frame + } elseif {[string match "JL\x1a\x00*" $test] && ([file size $filename] >= 27)} { + lappend type metakit smallendian + set metakit 1 + } elseif {[string match "LJ\x1a\x00*" $test] && ([file size $filename] >= 27)} { + lappend type metakit bigendian + set metakit 1 + } elseif { $binary && [string match "RIFF*" $test] && [string range $test 8 11] == "WAVE" } { + lappend type audio wave + } elseif { $binary && [string match "ID3*" $test] } { + lappend type audio mpeg + } elseif { $binary && [binary scan $test S tmp] && [expr {$tmp & 0xFFE0}] == 65504 } { + lappend type audio mpeg + } + + # Additional checks of file contents at the end of the file, + # possibly pointing into the middle too (attached metakit, + # attached zip). + + ## Metakit File format: http://www.equi4.com/metakit/metakit-ff.html + ## Metakit database attached ? ## + + if {!$metakit && ([file size $filename] >= 27)} { + # The offsets in the footer are in always bigendian format + + if { [ catch { + set fid [ open $filename r ] + fconfigure $fid -translation binary + fconfigure $fid -buffersize 1024 + fconfigure $fid -buffering full + seek $fid -16 end + set test [ read $fid 16 ] + ::close $fid + } err ] } { + catch { ::close $fid } + return -code error "::fileutil::fileType: $err" + } + + binary scan $test IIII __ hdroffset __ __ + set hdroffset [expr {[file size $filename] - 16 - $hdroffset}] + + # Further checks iff the offset is actually inside the file. + + if {($hdroffset >= 0) && ($hdroffset < [file size $filename])} { + # Seek to the specified location and try to match a metakit header + # at this location. + + if { [ catch { + set fid [ open $filename r ] + fconfigure $fid -translation binary + fconfigure $fid -buffersize 1024 + fconfigure $fid -buffering full + seek $fid $hdroffset start + set test [ read $fid 16 ] + ::close $fid + } err ] } { + catch { ::close $fid } + return -code error "::fileutil::fileType: $err" + } + + if {[string match "JL\x1a\x00*" $test]} { + lappend type attached metakit smallendian + set metakit 1 + } elseif {[string match "LJ\x1a\x00*" $test]} { + lappend type attached metakit bigendian + set metakit 1 + } + } + } + + ## Zip File Format: http://zziplib.sourceforge.net/zzip-parse.html + ## http://www.pkware.com/products/enterprise/white_papers/appnote.html + + + ;## lastly, is it a link? + if { ! [ catch {file readlink $filename} ] } { + lappend type link + } + return $type +} + +# ::fileutil::tempdir -- +# +# Return the correct directory to use for temporary files. +# Python attempts this sequence, which seems logical: +# +# 1. The directory named by the `TMPDIR' environment variable. +# +# 2. The directory named by the `TEMP' environment variable. +# +# 3. The directory named by the `TMP' environment variable. +# +# 4. A platform-specific location: +# * On Macintosh, the `Temporary Items' folder. +# +# * On Windows, the directories `C:\\TEMP', `C:\\TMP', +# `\\TEMP', and `\\TMP', in that order. +# +# * On all other platforms, the directories `/tmp', +# `/var/tmp', and `/usr/tmp', in that order. +# +# 5. As a last resort, the current working directory. +# +# The code here also does +# +# 0. The directory set by invoking tempdir with an argument. +# If this is present it is used exclusively. +# +# Arguments: +# None. +# +# Side Effects: +# None. +# +# Results: +# The directory for temporary files. + +proc ::fileutil::tempdir {args} { + if {[llength $args] > 1} { + return -code error {wrong#args: should be "::fileutil::tempdir ?path?"} + } elseif {[llength $args] == 1} { + variable tempdir [lindex $args 0] + variable tempdirSet 1 + return + } + return [Normalize [TempDir]] +} + +proc ::fileutil::tempdirReset {} { + variable tempdir {} + variable tempdirSet 0 + return +} + +proc ::fileutil::TempDir {} { + global tcl_platform env + variable tempdir + variable tempdirSet + + set attempdirs [list] + set problems {} + + if {$tempdirSet} { + lappend attempdirs $tempdir + lappend problems {User/Application specified tempdir} + } else { + foreach tmp {TMPDIR TEMP TMP} { + if { [info exists env($tmp)] } { + lappend attempdirs $env($tmp) + } else { + lappend problems "No environment variable $tmp" + } + } + + switch $tcl_platform(platform) { + windows { + lappend attempdirs "C:\\TEMP" "C:\\TMP" "\\TEMP" "\\TMP" + } + macintosh { + lappend attempdirs $env(TRASH_FOLDER) ;# a better place? + } + default { + lappend attempdirs \ + [file join / tmp] \ + [file join / var tmp] \ + [file join / usr tmp] + } + } + + lappend attempdirs [pwd] + } + + foreach tmp $attempdirs { + if { [file isdirectory $tmp] && [file writable $tmp] } { + return $tmp + } elseif { ![file isdirectory $tmp] } { + lappend problems "Not a directory: $tmp" + } else { + lappend problems "Not writable: $tmp" + } + } + + # Fail if nothing worked. + return -code error "Unable to determine a proper directory for temporary files\n[join $problems \n]" +} + +namespace eval ::fileutil { + variable tempdir {} + variable tempdirSet 0 +} + +# ::fileutil::maketempdir -- + +proc ::fileutil::maketempdir {args} { + return [Normalize [MakeTempDir $args]] +} + +proc ::fileutil::MakeTempDir {config} { + # Setup of default configuration. + array set options {} + set options(-suffix) "" + set options(-prefix) "tmp" + set options(-dir) [tempdir] + + # TODO: Check for and reject options not in -suffix, -prefix, -dir + # Merge user configuration, overwrite defaults. + array set options $config + + # See also "tempfile" below. Could be shareable internal configuration. + set chars abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 + set nrand_chars 10 + set maxtries 10 + + for {set i 0} {$i < $maxtries} {incr i} { + # Build up the candidate name. See also "tempfile". + set directory_name $options(-prefix) + for {set j 0} {$j < $nrand_chars} {incr j} { + append directory_name \ + [string index $chars [expr {int(rand() * 62)}]] + } + append directory_name $options(-suffix) + set path [file join $options(-dir) $directory_name] + + # Try to create. Try again if already exists, or trouble + # with creation and setting of perms. + # + # Note: The last looks as if it is able to leave partial + # directories behind (created, trouble with perms). But + # deleting ... Might pull the rug out from somebody else. + + if {[file exists $path]} continue + if {[catch { + file mkdir $path + if {$::tcl_platform(platform) eq "unix"} { + file attributes $path -permissions 0700 + } + }]} continue + + return $path + } + return -code error "Failed to find an unused temporary directory name" +} + +# ::fileutil::tempfile -- +# +# generate a temporary file name suitable for writing to +# the file name will be unique, writable and will be in the +# appropriate system specific temp directory +# Code taken from http://mini.net/tcl/772 attributed to +# Igor Volobouev and anon. +# +# Arguments: +# prefix - a prefix for the filename, p +# Results: +# returns a file name +# + +proc ::fileutil::tempfile {{prefix {}}} { + return [Normalize [TempFile $prefix]] +} + +proc ::fileutil::TempFile {prefix} { + set tmpdir [tempdir] + + set chars "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + set nrand_chars 10 + set maxtries 10 + set access [list RDWR CREAT EXCL] + set permission 0600 + set channel "" + set checked_dir_writable 0 + + for {set i 0} {$i < $maxtries} {incr i} { + set newname $prefix + for {set j 0} {$j < $nrand_chars} {incr j} { + append newname [string index $chars \ + [expr {int(rand()*62)}]] + } + set newname [file join $tmpdir $newname] + + if {[catch {open $newname $access $permission} channel]} { + if {!$checked_dir_writable} { + set dirname [file dirname $newname] + if {![file writable $dirname]} { + return -code error "Directory $dirname is not writable" + } + set checked_dir_writable 1 + } + } else { + # Success + close $channel + return $newname + } + + } + if {[string compare $channel ""]} { + return -code error "Failed to open a temporary file: $channel" + } else { + return -code error "Failed to find an unused temporary file name" + } +} + +# ::fileutil::install -- +# +# Tcl version of the 'install' command, which copies files from +# one places to another and also optionally sets some attributes +# such as group, owner, and permissions. +# +# Arguments: +# -m Change the file permissions to the specified +# value. Valid arguments are those accepted by +# file attributes -permissions +# +# Results: +# None. + +# TODO - add options for group/owner manipulation. + +proc ::fileutil::install {args} { + set options { + {m.arg "" "Set permission mode"} + } + set usage ": [lindex [info level 0] 0]\ +\[options] source destination \noptions:" + array set params [::cmdline::getoptions args $options $usage] + # Args should now just be the source and destination. + if { [llength $args] < 2 } { + return -code error $usage + } + set src [lindex $args 0] + set dst [lindex $args 1] + file copy -force $src $dst + if { $params(m) != "" } { + set targets [::fileutil::find $dst] + foreach fl $targets { + file attributes $fl -permissions $params(m) + } + } +} + +# ### ### ### ######### ######### ######### + +proc ::fileutil::lexnormalize {sp} { + set spx [file split $sp] + + # Resolution of embedded relative modifiers (., and ..). + + if { + ([lsearch -exact $spx . ] < 0) && + ([lsearch -exact $spx ..] < 0) + } { + # Quick path out if there are no relative modifiers + return $sp + } + + set absolute [expr {![string equal [file pathtype $sp] relative]}] + # A volumerelative path counts as absolute for our purposes. + + set sp $spx + set np {} + set noskip 1 + + while {[llength $sp]} { + set ele [lindex $sp 0] + set sp [lrange $sp 1 end] + set islast [expr {[llength $sp] == 0}] + + if {[string equal $ele ".."]} { + if { + ($absolute && ([llength $np] > 1)) || + (!$absolute && ([llength $np] >= 1)) + } { + # .. : Remove the previous element added to the + # new path, if there actually is enough to remove. + set np [lrange $np 0 end-1] + } + } elseif {[string equal $ele "."]} { + # Ignore .'s, they stay at the current location + continue + } else { + # A regular element. + lappend np $ele + } + } + if {[llength $np] > 0} { + return [eval [linsert $np 0 file join]] + # 8.5: return [file join {*}$np] + } + return {} +} + +# ### ### ### ######### ######### ######### +## Forward compatibility. Some routines require path normalization, +## something we have supported by the builtin 'file' only since Tcl +## 8.4. For versions of Tcl before that, to be supported by the +## module, we implement a normalizer in Tcl itself. Slow, but working. + +if {[package vcompare [package provide Tcl] 8.4] < 0} { + # Pre 8.4. We do not have 'file normalize'. We create an + # approximation for it based on earlier commands. + + # ... Hm. This is lexical normalization. It does not resolve + # symlinks in the path to their origin. + + proc ::fileutil::Normalize {sp} { + set sp [file split $sp] + + # Conversion of the incoming path to absolute. + if {[string equal [file pathtype [lindex $sp 0]] "relative"]} { + set sp [file split [eval [list file join [pwd]] $sp]] + } + + # Resolution of symlink components, and embedded relative + # modifiers (., and ..). + + set np {} + set noskip 1 + while {[llength $sp]} { + set ele [lindex $sp 0] + set sp [lrange $sp 1 end] + set islast [expr {[llength $sp] == 0}] + + if {[string equal $ele ".."]} { + if {[llength $np] > 1} { + # .. : Remove the previous element added to the + # new path, if there actually is enough to remove. + set np [lrange $np 0 end-1] + } + } elseif {[string equal $ele "."]} { + # Ignore .'s, they stay at the current location + continue + } else { + # A regular element. If it is not the last component + # then check if the combination is a symlink, and if + # yes, resolve it. + + lappend np $ele + + if {!$islast && $noskip} { + # The flag 'noskip' is technically not required, + # just 'file exists'. However if a path P does not + # exist, then all longer paths starting with P can + # not exist either, and using the flag to store + # this knowledge then saves us a number of + # unnecessary stat calls. IOW this a performance + # optimization. + + set p [eval file join $np] + set noskip [file exists $p] + if {$noskip} { + if {[string equal link [file type $p]]} { + set dst [file readlink $p] + + # We always push the destination in front of + # the source path (in expanded form). So that + # we handle .., .'s, and symlinks inside of + # this path as well. An absolute path clears + # the result, a relative one just removes the + # last, now resolved component. + + set sp [eval [linsert [file split $dst] 0 linsert $sp 0]] + + if {![string equal relative [file pathtype $dst]]} { + # Absolute|volrelative destination, clear + # result, we have to start over. + set np {} + } else { + # Relative link, just remove the resolved + # component again. + set np [lrange $np 0 end-1] + } + } + } + } + } + } + if {[llength $np] > 0} { + return [eval file join $np] + } + return {} + } +} else { + proc ::fileutil::Normalize {sp} { + file normalize $sp + } +} + +# ::fileutil::relative -- +# +# Taking two _directory_ paths, a base and a destination, computes the path +# of the destination relative to the base. +# +# Arguments: +# base The path to make the destination relative to. +# dst The destination path +# +# Results: +# The path of the destination, relative to the base. + +proc ::fileutil::relative {base dst} { + # Ensure that the link to directory 'dst' is properly done relative to + # the directory 'base'. + + if {![string equal [file pathtype $base] [file pathtype $dst]]} { + return -code error "Unable to compute relation for paths of different pathtypes: [file pathtype $base] vs. [file pathtype $dst], ($base vs. $dst)" + } + + set base [lexnormalize [file join [pwd] $base]] + set dst [lexnormalize [file join [pwd] $dst]] + + set save $dst + set base [file split $base] + set dst [file split $dst] + + while {[string equal [lindex $dst 0] [lindex $base 0]]} { + set dst [lrange $dst 1 end] + set base [lrange $base 1 end] + if {![llength $dst]} {break} + } + + set dstlen [llength $dst] + set baselen [llength $base] + + if {($dstlen == 0) && ($baselen == 0)} { + # Cases: + # (a) base == dst + + set dst . + } else { + # Cases: + # (b) base is: base/sub = sub + # dst is: base = {} + + # (c) base is: base = {} + # dst is: base/sub = sub + + while {$baselen > 0} { + set dst [linsert $dst 0 ..] + incr baselen -1 + } + # 8.5: set dst [file join {*}$dst] + set dst [eval [linsert $dst 0 file join]] + } + + return $dst +} + +# ::fileutil::relativeUrl -- +# +# Taking two _file_ paths, a base and a destination, computes the path +# of the destination relative to the base, from the inside of the base. +# +# This is how a browser resolves relative links in a file, hence the +# url in the command name. +# +# Arguments: +# base The file path to make the destination relative to. +# dst The destination file path +# +# Results: +# The path of the destination file, relative to the base file. + +proc ::fileutil::relativeUrl {base dst} { + # Like 'relative', but for links from _inside_ a file to a + # different file. + + if {![string equal [file pathtype $base] [file pathtype $dst]]} { + return -code error "Unable to compute relation for paths of different pathtypes: [file pathtype $base] vs. [file pathtype $dst], ($base vs. $dst)" + } + + set base [lexnormalize [file join [pwd] $base]] + set dst [lexnormalize [file join [pwd] $dst]] + + set basedir [file dirname $base] + set dstdir [file dirname $dst] + + set dstdir [relative $basedir $dstdir] + + # dstdir == '.' on input => dstdir output has trailing './'. Strip + # this superfluous segment off. + + if {[string equal $dstdir "."]} { + return [file tail $dst] + } elseif {[string equal [file tail $dstdir] "."]} { + return [file join [file dirname $dstdir] [file tail $dst]] + } else { + return [file join $dstdir [file tail $dst]] + } +} + +# ::fileutil::fullnormalize -- +# +# Normalizes a path completely. I.e. a symlink in the last +# element is resolved as well, not only symlinks in the higher +# elements. +# +# Arguments: +# path The path to normalize +# +# Results: +# The input path with all symlinks resolved. + +proc ::fileutil::fullnormalize {path} { + # When encountering symlinks in a file copy operation Tcl copies + # the link, not the contents of the file it references. There are + # situations there this is not acceptable. For these this command + # resolves all symbolic links in the path, including in the last + # element of the path. A "file copy" using the return value of + # this command copies an actual file, it will not encounter + # symlinks. + + # BUG / WORKAROUND. Using the / instead of the join seems to work + # around a bug in the path handling on windows which can break the + # core 'file normalize' for symbolic links. This was exposed by + # the find testsuite which could not reproduced outside. I believe + # that there is some deep path bug in the core triggered under + # special circumstances. Use of / likely forces a refresh through + # the string rep and so avoids the problem with the path intrep. + + return [file dirname [Normalize $path/__dummy__]] + #return [file dirname [Normalize [file join $path __dummy__]]] +} diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/http-2.10b1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/http-2.10b1.tm new file mode 100644 index 00000000..6c3c068c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/http-2.10b1.tm @@ -0,0 +1,5457 @@ +# http.tcl -- +# +# Client-side HTTP for GET, POST, and HEAD commands. These routines can +# be used in untrusted code that uses the Safesock security policy. +# These procedures use a callback interface to avoid using vwait, which +# is not defined in the safe base. +# +# See the file "license.terms" for information on usage and redistribution of +# this file, and for a DISCLAIMER OF ALL WARRANTIES. + +package require Tcl 8.6- +# Keep this in sync with pkgIndex.tcl and with the install directories in +# Makefiles +package provide http 2.10b1 + +namespace eval http { + # Allow resourcing to not clobber existing data + + variable http + if {![info exists http]} { + array set http { + -accept */* + -cookiejar {} + -pipeline 1 + -postfresh 0 + -proxyhost {} + -proxyport {} + -proxyfilter http::ProxyRequired + -proxynot {} + -proxyauth {} + -repost 0 + -threadlevel 0 + -urlencoding utf-8 + -zip 1 + } + # We need a useragent string of this style or various servers will + # refuse to send us compressed content even when we ask for it. This + # follows the de-facto layout of user-agent strings in current browsers. + # Safe interpreters do not have ::tcl_platform(os) or + # ::tcl_platform(osVersion). + if {[interp issafe]} { + set http(-useragent) "Mozilla/5.0\ + (Windows; U;\ + Windows NT 10.0)\ + http/[package provide http] Tcl/[package provide Tcl]" + } else { + set http(-useragent) "Mozilla/5.0\ + ([string totitle $::tcl_platform(platform)]; U;\ + $::tcl_platform(os) $::tcl_platform(osVersion))\ + http/[package provide http] Tcl/[package provide Tcl]" + } + } + + proc init {} { + # Set up the map for quoting chars. RFC3986 Section 2.3 say percent + # encode all except: "... percent-encoded octets in the ranges of + # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period + # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI + # producers ..." + for {set i 0} {$i <= 256} {incr i} { + set c [format %c $i] + if {![string match {[-._~a-zA-Z0-9]} $c]} { + set map($c) %[format %.2X $i] + } + } + # These are handled specially + set map(\n) %0D%0A + variable formMap [array get map] + + # Create a map for HTTP/1.1 open sockets + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + if {[info exists socketMapping]} { + # Close open sockets on re-init. Do not permit retries. + foreach {url sock} [array get socketMapping] { + unset -nocomplain socketClosing($url) + unset -nocomplain socketPlayCmd($url) + CloseSocket $sock + } + } + + # CloseSocket should have unset the socket* arrays, one element at + # a time. Now unset anything that was overlooked. + # Traces on "unset socketRdState(*)" will call CancelReadPipeline and + # cancel any queued responses. + # Traces on "unset socketWrState(*)" will call CancelWritePipeline and + # cancel any queued requests. + array unset socketMapping + array unset socketRdState + array unset socketWrState + array unset socketRdQueue + array unset socketWrQueue + array unset socketPhQueue + array unset socketClosing + array unset socketPlayCmd + array unset socketCoEvent + array unset socketProxyId + array set socketMapping {} + array set socketRdState {} + array set socketWrState {} + array set socketRdQueue {} + array set socketWrQueue {} + array set socketPhQueue {} + array set socketClosing {} + array set socketPlayCmd {} + array set socketCoEvent {} + array set socketProxyId {} + return + } + init + + variable urlTypes + if {![info exists urlTypes]} { + set urlTypes(http) [list 80 ::http::socket] + } + + variable encodings [string tolower [encoding names]] + # This can be changed, but iso8859-1 is the RFC standard. + variable defaultCharset + if {![info exists defaultCharset]} { + set defaultCharset "iso8859-1" + } + + # Force RFC 3986 strictness in geturl url verification? + variable strict + if {![info exists strict]} { + set strict 1 + } + + # Let user control default keepalive for compatibility + variable defaultKeepalive + if {![info exists defaultKeepalive]} { + set defaultKeepalive 0 + } + + # Regular expression used to parse cookies + variable CookieRE {(?x) # EXPANDED SYNTAX + \s* # Ignore leading spaces + ([^][\u0000- ()<>@,;:\\""/?={}\u007f-\uffff]+) # Match the name + = # LITERAL: Equal sign + ([!\u0023-+\u002D-:<-\u005B\u005D-~]*) # Match the value + (?: + \s* ; \s* # LITERAL: semicolon + ([^\u0000]+) # Match the options + )? + } + + variable TmpSockCounter 0 + variable ThreadCounter 0 + + variable reasonDict [dict create {*}{ + 100 Continue + 101 {Switching Protocols} + 102 Processing + 103 {Early Hints} + 200 OK + 201 Created + 202 Accepted + 203 {Non-Authoritative Information} + 204 {No Content} + 205 {Reset Content} + 206 {Partial Content} + 207 Multi-Status + 208 {Already Reported} + 226 {IM Used} + 300 {Multiple Choices} + 301 {Moved Permanently} + 302 Found + 303 {See Other} + 304 {Not Modified} + 305 {Use Proxy} + 306 (Unused) + 307 {Temporary Redirect} + 308 {Permanent Redirect} + 400 {Bad Request} + 401 Unauthorized + 402 {Payment Required} + 403 Forbidden + 404 {Not Found} + 405 {Method Not Allowed} + 406 {Not Acceptable} + 407 {Proxy Authentication Required} + 408 {Request Timeout} + 409 Conflict + 410 Gone + 411 {Length Required} + 412 {Precondition Failed} + 413 {Content Too Large} + 414 {URI Too Long} + 415 {Unsupported Media Type} + 416 {Range Not Satisfiable} + 417 {Expectation Failed} + 418 (Unused) + 421 {Misdirected Request} + 422 {Unprocessable Content} + 423 Locked + 424 {Failed Dependency} + 425 {Too Early} + 426 {Upgrade Required} + 428 {Precondition Required} + 429 {Too Many Requests} + 431 {Request Header Fields Too Large} + 451 {Unavailable For Legal Reasons} + 500 {Internal Server Error} + 501 {Not Implemented} + 502 {Bad Gateway} + 503 {Service Unavailable} + 504 {Gateway Timeout} + 505 {HTTP Version Not Supported} + 506 {Variant Also Negotiates} + 507 {Insufficient Storage} + 508 {Loop Detected} + 510 {Not Extended (OBSOLETED)} + 511 {Network Authentication Required} + }] + + variable failedProxyValues { + binary + body + charset + coding + connection + connectionRespFlag + currentsize + host + http + httpResponse + meta + method + querylength + queryoffset + reasonPhrase + requestHeaders + requestLine + responseCode + state + status + tid + totalsize + transfer + type + } + + namespace export geturl config reset wait formatQuery postError quoteString + namespace export register unregister registerError + namespace export requestLine requestHeaders requestHeaderValue + namespace export responseLine responseHeaders responseHeaderValue + namespace export responseCode responseBody responseInfo reasonPhrase + # - Legacy aliases, were never exported: + # data, code, mapReply, meta, ncode + # - Callable from outside (e.g. from TLS) by fully-qualified name, but + # not exported: + # socket + # - Useful, but never exported (and likely to have naming collisions): + # size, status, cleanup, error, init + # Comments suggest that "init" can be used for re-initialisation, + # although the command is undocumented. + # - Never exported, renamed from lower-case names: + # GetTextLine, MakeTransformationChunked. +} + +# http::Log -- +# +# Debugging output -- define this to observe HTTP/1.1 socket usage. +# Should echo any args received. +# +# Arguments: +# msg Message to output +# +if {[info command http::Log] eq {}} {proc http::Log {args} {}} + +# http::register -- +# +# See documentation for details. +# +# Arguments: +# proto URL protocol prefix, e.g. https +# port Default port for protocol +# command Command to use to create socket +# Results: +# list of port and command that was registered. + +proc http::register {proto port command} { + variable urlTypes + set urlTypes([string tolower $proto]) [list $port $command] +} + +# http::unregister -- +# +# Unregisters URL protocol handler +# +# Arguments: +# proto URL protocol prefix, e.g. https +# Results: +# list of port and command that was unregistered. + +proc http::unregister {proto} { + variable urlTypes + set lower [string tolower $proto] + if {![info exists urlTypes($lower)]} { + return -code error "unsupported url type \"$proto\"" + } + set old $urlTypes($lower) + unset urlTypes($lower) + return $old +} + +# http::config -- +# +# See documentation for details. +# +# Arguments: +# args Options parsed by the procedure. +# Results: +# TODO + +proc http::config {args} { + variable http + set options [lsort [array names http -*]] + set usage [join $options ", "] + if {[llength $args] == 0} { + set result {} + foreach name $options { + lappend result $name $http($name) + } + return $result + } + set options [string map {- ""} $options] + set pat ^-(?:[join $options |])$ + if {[llength $args] == 1} { + set flag [lindex $args 0] + if {![regexp -- $pat $flag]} { + return -code error "Unknown option $flag, must be: $usage" + } + return $http($flag) + } elseif {[llength $args] % 2} { + return -code error "If more than one argument is supplied, the\ + number of arguments must be even" + } else { + foreach {flag value} $args { + if {![regexp -- $pat $flag]} { + return -code error "Unknown option $flag, must be: $usage" + } + if {($flag eq {-threadlevel}) && ($value ni {0 1 2})} { + return -code error {Option -threadlevel must be 0, 1 or 2} + } + set http($flag) $value + } + return + } +} + +# ------------------------------------------------------------------------------ +# Proc http::reasonPhrase +# ------------------------------------------------------------------------------ +# Command to return the IANA-recommended "reason phrase" for a HTTP Status Code. +# Information obtained from: +# https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml +# +# Arguments: +# code - A valid HTTP Status Code (integer from 100 to 599) +# +# Return Value: the reason phrase +# ------------------------------------------------------------------------------ + +proc http::reasonPhrase {code} { + variable reasonDict + if {![regexp -- {^[1-5][0-9][0-9]$} $code]} { + set msg {argument must be a three-digit integer from 100 to 599} + return -code error $msg + } + if {[dict exists $reasonDict $code]} { + set reason [dict get $reasonDict $code] + } else { + set reason Unassigned + } + return $reason +} + +# http::Finish -- +# +# Clean up the socket and eval close time callbacks +# +# Arguments: +# token Connection token. +# errormsg (optional) If set, forces status to error. +# skipCB (optional) If set, don't call the -command callback. This +# is useful when geturl wants to throw an exception instead +# of calling the callback. That way, the same error isn't +# reported to two places. +# +# Side Effects: +# May close the socket. + +proc http::Finish {token {errormsg ""} {skipCB 0}} { + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + global errorInfo errorCode + set closeQueue 0 + if {$errormsg ne ""} { + set state(error) [list $errormsg $errorInfo $errorCode] + set state(status) "error" + } + if {[info commands ${token}--EventCoroutine] ne {}} { + rename ${token}--EventCoroutine {} + } + if {[info commands ${token}--SocketCoroutine] ne {}} { + rename ${token}--SocketCoroutine {} + } + if {[info exists state(socketcoro)]} { + Log $token Cancel socket after-idle event (Finish) + after cancel $state(socketcoro) + unset state(socketcoro) + } + + # Is this an upgrade request/response? + set upgradeResponse \ + [expr { [info exists state(upgradeRequest)] + && $state(upgradeRequest) + && [info exists state(http)] + && ([ncode $token] eq {101}) + && [info exists state(connection)] + && ("upgrade" in $state(connection)) + && [info exists state(upgrade)] + && ("" ne $state(upgrade)) + }] + + if { ($state(status) eq "timeout") + || ($state(status) eq "error") + || ($state(status) eq "eof") + } { + set closeQueue 1 + set connId $state(socketinfo) + if {[info exists state(sock)]} { + set sock $state(sock) + CloseSocket $state(sock) $token + } else { + # When opening the socket and calling http::reset + # immediately, the socket may not yet exist. + # Test http-4.11 may come here. + } + if {$state(tid) ne {}} { + # When opening the socket in a thread, and calling http::reset + # immediately, the thread may still exist. + # Test http-4.11 may come here. + thread::release $state(tid) + set state(tid) {} + } else { + } + } elseif {$upgradeResponse} { + # Special handling for an upgrade request/response. + # - geturl ensures that this is not a "persistent" socket used for + # multiple HTTP requests, so a call to KeepSocket is not needed. + # - Leave socket open, so a call to CloseSocket is not needed either. + # - Remove fileevent bindings. The caller will set its own bindings. + # - THE CALLER MUST PROCESS THE UPGRADED SOCKET IN THE CALLBACK COMMAND + # PASSED TO http::geturl AS -command callback. + catch {fileevent $state(sock) readable {}} + catch {fileevent $state(sock) writable {}} + } elseif { + ([info exists state(-keepalive)] && !$state(-keepalive)) + || ([info exists state(connection)] && ("close" in $state(connection))) + } { + set closeQueue 1 + set connId $state(socketinfo) + if {[info exists state(sock)]} { + set sock $state(sock) + CloseSocket $state(sock) $token + } else { + # When opening the socket and calling http::reset + # immediately, the socket may not yet exist. + # Test http-4.11 may come here. + } + } elseif { + ([info exists state(-keepalive)] && $state(-keepalive)) + && ([info exists state(connection)] && ("close" ni $state(connection))) + } { + KeepSocket $token + } + if {[info exists state(after)]} { + after cancel $state(after) + unset state(after) + } + if {[info exists state(-command)] && (!$skipCB) + && (![info exists state(done-command-cb)])} { + set state(done-command-cb) yes + if { [catch {namespace eval :: $state(-command) $token} err] + && ($errormsg eq "") + } { + set state(error) [list $err $errorInfo $errorCode] + set state(status) error + } + } + + if { $closeQueue + && [info exists socketMapping($connId)] + && ($socketMapping($connId) eq $sock) + } { + http::CloseQueuedQueries $connId $token + # This calls Unset. Other cases do not need the call. + } + return +} + +# http::KeepSocket - +# +# Keep a socket in the persistent sockets table and connect it to its next +# queued task if possible. Otherwise leave it idle and ready for its next +# use. +# +# If $socketClosing(*), then ("close" in $state(connection)) and therefore +# this command will not be called by Finish. +# +# Arguments: +# token Connection token. + +proc http::KeepSocket {token} { + variable http + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + # Keep this socket open for another request ("Keep-Alive"). + # React if the server half-closes the socket. + # Discussion is in http::geturl. + catch {fileevent $state(sock) readable [list http::CheckEof $state(sock)]} + + # The line below should not be changed in production code. + # It is edited by the test suite. + set TEST_EOF 0 + if {$TEST_EOF} { + # ONLY for testing reaction to server eof. + # No server timeouts will be caught. + catch {fileevent $state(sock) readable {}} + } + + if { [info exists state(socketinfo)] + && [info exists socketMapping($state(socketinfo))] + } { + set connId $state(socketinfo) + # The value "Rready" is set only here. + set socketRdState($connId) Rready + + if { $state(-pipeline) + && [info exists socketRdQueue($connId)] + && [llength $socketRdQueue($connId)] + } { + # The usual case for pipelined responses - if another response is + # queued, arrange to read it. + set token3 [lindex $socketRdQueue($connId) 0] + set socketRdQueue($connId) [lrange $socketRdQueue($connId) 1 end] + + #Log pipelined, GRANT read access to $token3 in KeepSocket + set socketRdState($connId) $token3 + ReceiveResponse $token3 + + # Other pipelined cases. + # - The test above ensures that, for the pipelined cases in the two + # tests below, the read queue is empty. + # - In those two tests, check whether the next write will be + # nonpipeline. + } elseif { + $state(-pipeline) + && [info exists socketWrState($connId)] + && ($socketWrState($connId) eq "peNding") + + && [info exists socketWrQueue($connId)] + && [llength $socketWrQueue($connId)] + && (![set token3 [lindex $socketWrQueue($connId) 0] + set ${token3}(-pipeline) + ] + ) + } { + # This case: + # - Now it the time to run the "pending" request. + # - The next token in the write queue is nonpipeline, and + # socketWrState has been marked "pending" (in + # http::NextPipelinedWrite or http::geturl) so a new pipelined + # request cannot jump the queue. + # + # Tests: + # - In this case the read queue (tested above) is empty and this + # "pending" write token is in front of the rest of the write + # queue. + # - The write state is not Wready and therefore appears to be busy, + # but because it is "pending" we know that it is reserved for the + # first item in the write queue, a non-pipelined request that is + # waiting for the read queue to empty. That has now happened: so + # give that request read and write access. + set conn [set ${token3}(connArgs)] + #Log nonpipeline, GRANT r/w access to $token3 in KeepSocket + set socketRdState($connId) $token3 + set socketWrState($connId) $token3 + set socketWrQueue($connId) [lrange $socketWrQueue($connId) 1 end] + # Connect does its own fconfigure. + fileevent $state(sock) writable [list http::Connect $token3 {*}$conn] + #Log ---- $state(sock) << conn to $token3 for HTTP request (c) + + } elseif { + $state(-pipeline) + && [info exists socketWrState($connId)] + && ($socketWrState($connId) eq "peNding") + + } { + # Should not come here. The second block in the previous "elseif" + # test should be tautologous (but was needed in an earlier + # implementation) and will be removed after testing. + # If we get here, the value "pending" was assigned in error. + # This error would block the queue for ever. + Log ^X$tk <<<<< Error in queueing of requests >>>>> - token $token + + } elseif { + $state(-pipeline) + && [info exists socketWrState($connId)] + && ($socketWrState($connId) eq "Wready") + + && [info exists socketWrQueue($connId)] + && [llength $socketWrQueue($connId)] + && (![set token3 [lindex $socketWrQueue($connId) 0] + set ${token3}(-pipeline) + ] + ) + } { + # This case: + # - The next token in the write queue is nonpipeline, and + # socketWrState is Wready. Get the next event from socketWrQueue. + # Tests: + # - In this case the read state (tested above) is Rready and the + # write state (tested here) is Wready - there is no "pending" + # request. + # Code: + # - The code is the same as the code below for the nonpipelined + # case with a queued request. + set conn [set ${token3}(connArgs)] + #Log nonpipeline, GRANT r/w access to $token3 in KeepSocket + set socketRdState($connId) $token3 + set socketWrState($connId) $token3 + set socketWrQueue($connId) [lrange $socketWrQueue($connId) 1 end] + # Connect does its own fconfigure. + fileevent $state(sock) writable [list http::Connect $token3 {*}$conn] + #Log ---- $state(sock) << conn to $token3 for HTTP request (c) + + } elseif { + (!$state(-pipeline)) + && [info exists socketWrQueue($connId)] + && [llength $socketWrQueue($connId)] + && ("close" ni $state(connection)) + } { + # If not pipelined, (socketRdState eq Rready) tells us that we are + # ready for the next write - there is no need to check + # socketWrState. Write the next request, if one is waiting. + # If the next request is pipelined, it receives premature read + # access to the socket. This is not a problem. + set token3 [lindex $socketWrQueue($connId) 0] + set conn [set ${token3}(connArgs)] + #Log nonpipeline, GRANT r/w access to $token3 in KeepSocket + set socketRdState($connId) $token3 + set socketWrState($connId) $token3 + set socketWrQueue($connId) [lrange $socketWrQueue($connId) 1 end] + # Connect does its own fconfigure. + fileevent $state(sock) writable [list http::Connect $token3 {*}$conn] + #Log ---- $state(sock) << conn to $token3 for HTTP request (d) + + } elseif {(!$state(-pipeline))} { + set socketWrState($connId) Wready + # Rready and Wready and idle: nothing to do. + } + + } else { + CloseSocket $state(sock) $token + # There is no socketMapping($state(socketinfo)), so it does not matter + # that CloseQueuedQueries is not called. + } + return +} + +# http::CheckEof - +# +# Read from a socket and close it if eof. +# The command is bound to "fileevent readable" on an idle socket, and +# "eof" is the only event that should trigger the binding, occurring when +# the server times out and half-closes the socket. +# +# A read is necessary so that [eof] gives a meaningful result. +# Any bytes sent are junk (or a bug). + +proc http::CheckEof {sock} { + set junk [read $sock] + set n [string length $junk] + if {$n} { + Log "WARNING: $n bytes received but no HTTP request sent" + } + + if {[catch {eof $sock} res] || $res} { + # The server has half-closed the socket. + # If a new write has started, its transaction will fail and + # will then be error-handled. + CloseSocket $sock + } + return +} + +# http::CloseSocket - +# +# Close a socket and remove it from the persistent sockets table. If +# possible an http token is included here but when we are called from a +# fileevent on remote closure we need to find the correct entry - hence +# the "else" block of the first "if" command. + +proc http::CloseSocket {s {token {}}} { + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + set tk [namespace tail $token] + + catch {fileevent $s readable {}} + set connId {} + if {$token ne ""} { + variable $token + upvar 0 $token state + if {[info exists state(socketinfo)]} { + set connId $state(socketinfo) + } + } else { + set map [array get socketMapping] + set ndx [lsearch -exact $map $s] + if {$ndx >= 0} { + incr ndx -1 + set connId [lindex $map $ndx] + } + } + if { ($connId ne {}) + && [info exists socketMapping($connId)] + && ($socketMapping($connId) eq $s) + } { + Log "Closing connection $connId (sock $socketMapping($connId))" + if {[catch {close $socketMapping($connId)} err]} { + Log "Error closing connection: $err" + } else { + } + if {$token eq {}} { + # Cases with a non-empty token are handled by Finish, so the tokens + # are finished in connection order. + http::CloseQueuedQueries $connId + } else { + } + } else { + Log "Closing socket $s (no connection info)" + if {[catch {close $s} err]} { + Log "Error closing socket: $err" + } else { + } + } + return +} + +# http::CloseQueuedQueries +# +# connId - identifier "domain:port" for the connection +# token - (optional) used only for logging +# +# Called from http::CloseSocket and http::Finish, after a connection is closed, +# to clear the read and write queues if this has not already been done. + +proc http::CloseQueuedQueries {connId {token {}}} { + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + ##Log CloseQueuedQueries $connId $token + if {![info exists socketMapping($connId)]} { + # Command has already been called. + # Don't come here again - especially recursively. + return + } + + # Used only for logging. + if {$token eq {}} { + set tk {} + } else { + set tk [namespace tail $token] + } + + if { [info exists socketPlayCmd($connId)] + && ($socketPlayCmd($connId) ne {ReplayIfClose Wready {} {}}) + } { + # Before unsetting, there is some unfinished business. + # - If the server sent "Connection: close", we have stored the command + # for retrying any queued requests in socketPlayCmd, so copy that + # value for execution below. socketClosing(*) was also set. + # - Also clear the queues to prevent calls to Finish that would set the + # state for the requests that will be retried to "finished with error + # status". + # - At this stage socketPhQueue is empty. + set unfinished $socketPlayCmd($connId) + set socketRdQueue($connId) {} + set socketWrQueue($connId) {} + } else { + set unfinished {} + } + + Unset $connId + + if {$unfinished ne {}} { + Log ^R$tk Any unfinished transactions (excluding $token) failed \ + - token $token - unfinished $unfinished + {*}$unfinished + # Calls ReplayIfClose. + } + return +} + +# http::Unset +# +# The trace on "unset socketRdState(*)" will call CancelReadPipeline +# and cancel any queued responses. +# The trace on "unset socketWrState(*)" will call CancelWritePipeline +# and cancel any queued requests. + +proc http::Unset {connId} { + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + unset socketMapping($connId) + unset socketRdState($connId) + unset socketWrState($connId) + unset -nocomplain socketRdQueue($connId) + unset -nocomplain socketWrQueue($connId) + unset -nocomplain socketClosing($connId) + unset -nocomplain socketPlayCmd($connId) + unset -nocomplain socketProxyId($connId) + return +} + +# http::reset -- +# +# See documentation for details. +# +# Arguments: +# token Connection token. +# why Status info. +# +# Side Effects: +# See Finish + +proc http::reset {token {why reset}} { + variable $token + upvar 0 $token state + set state(status) $why + catch {fileevent $state(sock) readable {}} + catch {fileevent $state(sock) writable {}} + Finish $token + if {[info exists state(error)]} { + set errorlist $state(error) + unset state + eval ::error $errorlist + # i.e. error msg errorInfo errorCode + } + return +} + +# http::geturl -- +# +# Establishes a connection to a remote url via http. +# +# Arguments: +# url The http URL to goget. +# args Option value pairs. Valid options include: +# -blocksize, -validate, -headers, -timeout +# Results: +# Returns a token for this connection. This token is the name of an +# array that the caller should unset to garbage collect the state. + +proc http::geturl {url args} { + variable urlTypes + + # - If ::tls::socketCmd has its default value "::socket", change it to the + # new value ::http::socketForTls. + # - If the old value is different, then it has been modified either by the + # script or by the Tcl installation, and replaced by a new command. The + # script or installation that modified ::tls::socketCmd is also + # responsible for integrating ::http::socketForTls into its own "new" + # command, if it wishes to do so. + # - Commands that open a socket: + # - ::socket - basic + # - ::http::socket - can use a thread to avoid blockage by slow DNS + # lookup. See http::config option -threadlevel. + # - ::http::socketForTls - as ::http::socket, but can also open a socket + # for HTTPS/TLS through a proxy. + + if {[info exists ::tls::socketCmd] && ($::tls::socketCmd eq {::socket})} { + set ::tls::socketCmd ::http::socketForTls + } + + set token [CreateToken $url {*}$args] + variable $token + upvar 0 $token state + + AsyncTransaction $token + + # -------------------------------------------------------------------------- + # Synchronous Call to http::geturl + # -------------------------------------------------------------------------- + # - If the call to http::geturl is asynchronous, it is now complete (apart + # from delivering the return value). + # - If the call to http::geturl is synchronous, the command must now wait + # for the HTTP transaction to be completed. The call to http::wait uses + # vwait, which may be inappropriate if the caller makes other HTTP + # requests in the background. + # -------------------------------------------------------------------------- + + if {![info exists state(-command)]} { + # geturl does EVERYTHING asynchronously, so if the user + # calls it synchronously, we just do a wait here. + http::wait $token + + if {![info exists state]} { + # If we timed out then Finish has been called and the users + # command callback may have cleaned up the token. If so we end up + # here with nothing left to do. + return $token + } elseif {$state(status) eq "error"} { + # Something went wrong while trying to establish the connection. + # Clean up after events and such, but DON'T call the command + # callback (if available) because we're going to throw an + # exception from here instead. + set err [lindex $state(error) 0] + cleanup $token + return -code error $err + } + } + + return $token +} + +# ------------------------------------------------------------------------------ +# Proc http::CreateToken +# ------------------------------------------------------------------------------ +# Command to convert arguments into an initialised request token. +# The return value is the variable name of the token. +# +# Other effects: +# - Sets ::http::http(usingThread) if not already done +# - Sets ::http::http(uid) if not already done +# - Increments ::http::http(uid) +# - May increment ::http::TmpSockCounter +# - Alters ::http::socketPlayCmd, ::http::socketWrQueue if a -keepalive 1 +# request is appended to the queue of a persistent socket that is already +# scheduled to close. +# This also sets state(alreadyQueued) to 1. +# - Alters ::http::socketPhQueue if a -keepalive 1 request is appended to the +# queue of a persistent socket that has not yet been created (and is therefore +# represented by a placeholder). +# This also sets state(ReusingPlaceholder) to 1. +# ------------------------------------------------------------------------------ + +proc http::CreateToken {url args} { + variable http + variable urlTypes + variable defaultCharset + variable defaultKeepalive + variable strict + variable TmpSockCounter + + # Initialize the state variable, an array. We'll return the name of this + # array as the token for the transaction. + + if {![info exists http(usingThread)]} { + set http(usingThread) 0 + } + if {![info exists http(uid)]} { + set http(uid) 0 + } + set token [namespace current]::[incr http(uid)] + ##Log Starting http::geturl - token $token + variable $token + upvar 0 $token state + set tk [namespace tail $token] + reset $token + Log ^A$tk URL $url - token $token + + # Process command options. + + array set state { + -binary false + -blocksize 8192 + -queryblocksize 8192 + -validate 0 + -headers {} + -timeout 0 + -type application/x-www-form-urlencoded + -queryprogress {} + -protocol 1.1 + -guesstype 0 + binary 0 + state created + meta {} + method {} + coding {} + currentsize 0 + totalsize 0 + querylength 0 + queryoffset 0 + type application/octet-stream + body {} + status "" + http "" + httpResponse {} + responseCode {} + reasonPhrase {} + connection keep-alive + tid {} + requestHeaders {} + requestLine {} + transfer {} + proxyUsed none + } + set state(-keepalive) $defaultKeepalive + set state(-strict) $strict + # These flags have their types verified [Bug 811170] + array set type { + -binary boolean + -blocksize integer + -guesstype boolean + -queryblocksize integer + -strict boolean + -timeout integer + -validate boolean + -headers list + } + set state(charset) $defaultCharset + set options { + -binary -blocksize -channel -command -guesstype -handler -headers -keepalive + -method -myaddr -progress -protocol -query -queryblocksize + -querychannel -queryprogress -strict -timeout -type -validate + } + set usage [join [lsort $options] ", "] + set options [string map {- ""} $options] + set pat ^-(?:[join $options |])$ + foreach {flag value} $args { + if {[regexp -- $pat $flag]} { + # Validate numbers + if { [info exists type($flag)] + && (![string is $type($flag) -strict $value]) + } { + unset $token + return -code error \ + "Bad value for $flag ($value), must be $type($flag)" + } + if {($flag eq "-headers") && ([llength $value] % 2 != 0)} { + unset $token + return -code error "Bad value for $flag ($value), number\ + of list elements must be even" + } + set state($flag) $value + } else { + unset $token + return -code error "Unknown option $flag, can be: $usage" + } + } + + # Make sure -query and -querychannel aren't both specified + + set isQueryChannel [info exists state(-querychannel)] + set isQuery [info exists state(-query)] + if {$isQuery && $isQueryChannel} { + unset $token + return -code error "Can't combine -query and -querychannel options!" + } + + # Validate URL, determine the server host and port, and check proxy case + # Recognize user:pass@host URLs also, although we do not do anything with + # that info yet. + + # URLs have basically four parts. + # First, before the colon, is the protocol scheme (e.g. http) + # Second, for HTTP-like protocols, is the authority + # The authority is preceded by // and lasts up to (but not including) + # the following / or ? and it identifies up to four parts, of which + # only one, the host, is required (if an authority is present at all). + # All other parts of the authority (user name, password, port number) + # are optional. + # Third is the resource name, which is split into two parts at a ? + # The first part (from the single "/" up to "?") is the path, and the + # second part (from that "?" up to "#") is the query. *HOWEVER*, we do + # not need to separate them; we send the whole lot to the server. + # Both, path and query are allowed to be missing, including their + # delimiting character. + # Fourth is the fragment identifier, which is everything after the first + # "#" in the URL. The fragment identifier MUST NOT be sent to the server + # and indeed, we don't bother to validate it (it could be an error to + # pass it in here, but it's cheap to strip). + # + # An example of a URL that has all the parts: + # + # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes + # + # The "http" is the protocol, the user is "jschmoe", the password is + # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is + # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". + # + # Note that the RE actually combines the user and password parts, as + # recommended in RFC 3986. Indeed, that RFC states that putting passwords + # in URLs is a Really Bad Idea, something with which I would agree utterly. + # RFC 9110 Sec 4.2.4 goes further than this, and deprecates the format + # "user:password@". It is retained here for backward compatibility, + # but its use is not recommended. + # + # From a validation perspective, we need to ensure that the parts of the + # URL that are going to the server are correctly encoded. This is only + # done if $state(-strict) is true (inherited from $::http::strict). + + set URLmatcher {(?x) # this is _expanded_ syntax + ^ + (?: (\w+) : ) ? # + (?: // + (?: + ( + [^@/\#?]+ # + ) @ + )? + ( # + [^/:\#?]+ | # host name or IPv4 address + \[ [^/\#?]+ \] # IPv6 address in square brackets + ) + (?: : (\d+) )? # + )? + ( [/\?] [^\#]*)? # (including query) + (?: \# (.*) )? # + $ + } + + # Phase one: parse + if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { + unset $token + return -code error "Unsupported URL: $url" + } + # Phase two: validate + set host [string trim $host {[]}]; # strip square brackets from IPv6 address + if {$host eq ""} { + # Caller has to provide a host name; we do not have a "default host" + # that would enable us to handle relative URLs. + unset $token + return -code error "Missing host part: $url" + # Note that we don't check the hostname for validity here; if it's + # invalid, we'll simply fail to resolve it later on. + } + if {$port ne "" && $port > 65535} { + unset $token + return -code error "Invalid port number: $port" + } + # The user identification and resource identification parts of the URL can + # have encoded characters in them; take care! + if {$user ne ""} { + # Check for validity according to RFC 3986, Appendix A + set validityRE {(?xi) + ^ + (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ + $ + } + if {$state(-strict) && ![regexp -- $validityRE $user]} { + unset $token + # Provide a better error message in this error case + if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { + return -code error \ + "Illegal encoding character usage \"$bad\" in URL user" + } + return -code error "Illegal characters in URL user" + } + } + if {$srvurl ne ""} { + # RFC 3986 allows empty paths (not even a /), but servers + # return 400 if the path in the HTTP request doesn't start + # with / , so add it here if needed. + if {[string index $srvurl 0] ne "/"} { + set srvurl /$srvurl + } + # Check for validity according to RFC 3986, Appendix A + set validityRE {(?xi) + ^ + # Path part (already must start with / character) + (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* + # Query part (optional, permits ? characters) + (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? + $ + } + if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { + unset $token + # Provide a better error message in this error case + if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { + return -code error \ + "Illegal encoding character usage \"$bad\" in URL path" + } + return -code error "Illegal characters in URL path" + } + if {![regexp {^[^?#]+} $srvurl state(path)]} { + set state(path) / + } + } else { + set srvurl / + set state(path) / + } + if {$proto eq ""} { + set proto http + } + set lower [string tolower $proto] + if {![info exists urlTypes($lower)]} { + unset $token + return -code error "Unsupported URL type \"$proto\"" + } + set defport [lindex $urlTypes($lower) 0] + set defcmd [lindex $urlTypes($lower) 1] + + if {$port eq ""} { + set port $defport + } + if {![catch {$http(-proxyfilter) $host} proxy]} { + set phost [lindex $proxy 0] + set pport [lindex $proxy 1] + } else { + set phost {} + set pport {} + } + + # OK, now reassemble into a full URL + set url ${proto}:// + if {$user ne ""} { + append url $user + append url @ + } + append url $host + if {$port != $defport} { + append url : $port + } + append url $srvurl + # Don't append the fragment! RFC 7230 Sec 5.1 + set state(url) $url + + # Proxy connections aren't shared among different hosts. + set state(socketinfo) $host:$port + + # Save the accept types at this point to prevent a race condition. [Bug + # c11a51c482] + set state(accept-types) $http(-accept) + + # Check whether this is an Upgrade request. + set connectionValues [SplitCommaSeparatedFieldValue \ + [GetFieldValue $state(-headers) Connection]] + set connectionValues [string tolower $connectionValues] + set upgradeValues [SplitCommaSeparatedFieldValue \ + [GetFieldValue $state(-headers) Upgrade]] + set state(upgradeRequest) [expr { "upgrade" in $connectionValues + && [llength $upgradeValues] >= 1}] + set state(connectionValues) $connectionValues + + if {$isQuery || $isQueryChannel} { + # It's a POST. + # A client wishing to send a non-idempotent request SHOULD wait to send + # that request until it has received the response status for the + # previous request. + if {$http(-postfresh)} { + # Override -keepalive for a POST. Use a new connection, and thus + # avoid the small risk of a race against server timeout. + set state(-keepalive) 0 + } else { + # Allow -keepalive but do not -pipeline - wait for the previous + # transaction to finish. + # There is a small risk of a race against server timeout. + set state(-pipeline) 0 + } + } elseif {$state(upgradeRequest)} { + # It's an upgrade request. Method must be GET (untested). + # Force -keepalive to 0 so the connection is not made over a persistent + # socket, i.e. one used for multiple HTTP requests. + set state(-keepalive) 0 + } else { + # It's a non-upgrade GET or HEAD. + set state(-pipeline) $http(-pipeline) + } + + # We cannot handle chunked encodings with -handler, so force HTTP/1.0 + # until we can manage this. + if {[info exists state(-handler)]} { + set state(-protocol) 1.0 + } + + # RFC 7320 A.1 - HTTP/1.0 Keep-Alive is problematic. We do not support it. + if {$state(-protocol) eq "1.0"} { + set state(connection) close + set state(-keepalive) 0 + } + + # Handle proxy requests here for http:// but not for https:// + # The proxying for https is done in the ::http::socketForTls command. + # A proxy request for http:// needs the full URL in the HTTP request line, + # including the server name. + # The *tls* test below attempts to describe protocols in addition to + # "https on port 443" that use HTTP over TLS. + if {($phost ne "") && (![string match -nocase *tls* $defcmd])} { + set srvurl $url + set targetAddr [list $phost $pport] + set state(proxyUsed) HttpProxy + # The value of state(proxyUsed) none|HttpProxy depends only on the + # all-transactions http::config settings and on the target URL. + # Even if this is a persistent socket there is no need to change the + # value of state(proxyUsed) for other transactions that use the socket: + # they have the same value already. + } else { + set targetAddr [list $host $port] + } + + set sockopts [list -async] + + # Pass -myaddr directly to the socket command + if {[info exists state(-myaddr)]} { + lappend sockopts -myaddr $state(-myaddr) + } + + set state(connArgs) [list $proto $phost $srvurl] + set state(openCmd) [list {*}$defcmd {*}$sockopts -type $token {*}$targetAddr] + + # See if we are supposed to use a previously opened channel. + # - In principle, ANY call to http::geturl could use a previously opened + # channel if it is available - the "Connection: keep-alive" header is a + # request to leave the channel open AFTER completion of this call. + # - In fact, we try to use an existing channel only if -keepalive 1 -- this + # means that at most one channel is left open for each value of + # $state(socketinfo). This property simplifies the mapping of open + # channels. + set reusing 0 + set state(alreadyQueued) 0 + set state(ReusingPlaceholder) 0 + if {$state(-keepalive)} { + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + if {[info exists socketMapping($state(socketinfo))]} { + # - If the connection is idle, it has a "fileevent readable" binding + # to http::CheckEof, in case the server times out and half-closes + # the socket (http::CheckEof closes the other half). + # - We leave this binding in place until just before the last + # puts+flush in http::Connected (GET/HEAD) or http::Write (POST), + # after which the HTTP response might be generated. + + if { [info exists socketClosing($state(socketinfo))] + && $socketClosing($state(socketinfo)) + } { + # socketClosing(*) is set because the server has sent a + # "Connection: close" header. + # Do not use the persistent socket again. + # Since we have only one persistent socket per server, and the + # old socket is not yet dead, add the request to the write queue + # of the dying socket, which will be replayed by ReplayIfClose. + # Also add it to socketWrQueue(*) which is used only if an error + # causes a call to Finish. + set reusing 1 + set sock $socketMapping($state(socketinfo)) + set state(proxyUsed) $socketProxyId($state(socketinfo)) + Log "reusing closing socket $sock for $state(socketinfo) - token $token" + + set state(alreadyQueued) 1 + lassign $socketPlayCmd($state(socketinfo)) com0 com1 com2 com3 + lappend com3 $token + set socketPlayCmd($state(socketinfo)) [list $com0 $com1 $com2 $com3] + lappend socketWrQueue($state(socketinfo)) $token + ##Log socketPlayCmd($state(socketinfo)) is $socketPlayCmd($state(socketinfo)) + ##Log socketWrQueue($state(socketinfo)) is $socketWrQueue($state(socketinfo)) + } elseif { + [catch {fconfigure $socketMapping($state(socketinfo))}] + && (![SockIsPlaceHolder $socketMapping($state(socketinfo))]) + } { + ###Log "Socket $socketMapping($state(socketinfo)) for $state(socketinfo)" + # FIXME Is it still possible for this code to be executed? If + # so, this could be another place to call TestForReplay, + # rather than discarding the queued transactions. + Log "WARNING: socket for $state(socketinfo) was closed\ + - token $token" + Log "WARNING - if testing, pay special attention to this\ + case (GH) which is seldom executed - token $token" + + # This will call CancelReadPipeline, CancelWritePipeline, and + # cancel any queued requests, responses. + Unset $state(socketinfo) + } else { + # Use the persistent socket. + # - The socket may not be ready to write: an earlier request might + # still be still writing (in the pipelined case) or + # writing/reading (in the nonpipeline case). This possibility + # is handled by socketWrQueue later in this command. + # - The socket may not yet exist, and be defined with a placeholder. + set reusing 1 + set sock $socketMapping($state(socketinfo)) + set state(proxyUsed) $socketProxyId($state(socketinfo)) + if {[SockIsPlaceHolder $sock]} { + set state(ReusingPlaceholder) 1 + lappend socketPhQueue($sock) $token + } else { + } + Log "reusing open socket $sock for $state(socketinfo) - token $token" + } + # Do not automatically close the connection socket. + set state(connection) keep-alive + } + } + + set state(reusing) $reusing + unset reusing + + if {![info exists sock]} { + # N.B. At this point ([info exists sock] == $state(reusing)). + # This will no longer be true after we set a value of sock here. + # Give the socket a placeholder name. + set sock HTTP_PLACEHOLDER_[incr TmpSockCounter] + } + set state(sock) $sock + + if {$state(reusing)} { + # Define these for use (only) by http::ReplayIfDead if the persistent + # connection has died. + set state(tmpConnArgs) $state(connArgs) + set state(tmpState) [array get state] + set state(tmpOpenCmd) $state(openCmd) + } + return $token +} + + +# ------------------------------------------------------------------------------ +# Proc ::http::SockIsPlaceHolder +# ------------------------------------------------------------------------------ +# Command to return 0 if the argument is a genuine socket handle, or 1 if is a +# placeholder value generated by geturl or ReplayCore before the real socket is +# created. +# +# Arguments: +# sock - either a valid socket handle or a placeholder value +# +# Return Value: 0 or 1 +# ------------------------------------------------------------------------------ + +proc http::SockIsPlaceHolder {sock} { + expr {[string range $sock 0 16] eq {HTTP_PLACEHOLDER_}} +} + + +# ------------------------------------------------------------------------------ +# state(reusing) +# ------------------------------------------------------------------------------ +# - state(reusing) is set by geturl, ReplayCore +# - state(reusing) is used by geturl, AsyncTransaction, OpenSocket, +# ConfigureNewSocket, and ScheduleRequest when creating and configuring the +# connection. +# - state(reusing) is used by Connect, Connected, Event x 2 when deciding +# whether to call TestForReplay. +# - Other places where state(reusing) is used: +# - Connected - if reusing and not pipelined, start the state(-timeout) +# timeout (when writing). +# - DoneRequest - if reusing and pipelined, send the next pipelined write +# - Event - if reusing and pipelined, start the state(-timeout) +# timeout (when reading). +# - Event - if (not reusing) and pipelined, send the next pipelined +# write. +# ------------------------------------------------------------------------------ + + +# ------------------------------------------------------------------------------ +# Proc http::AsyncTransaction +# ------------------------------------------------------------------------------ +# This command is called by geturl and ReplayCore to prepare the HTTP +# transaction prescribed by a suitably prepared token. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::AsyncTransaction {token} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + set sock $state(sock) + + # See comments above re the start of this timeout in other cases. + if {(!$state(reusing)) && ($state(-timeout) > 0)} { + set state(after) [after $state(-timeout) \ + [list http::reset $token timeout]] + } + + if { $state(-keepalive) + && (![info exists socketMapping($state(socketinfo))]) + } { + # This code is executed only for the first -keepalive request on a + # socket. It makes the socket persistent. + ##Log " PreparePersistentConnection" $token -- $sock -- DO + set DoLater [PreparePersistentConnection $token] + } else { + ##Log " PreparePersistentConnection" $token -- $sock -- SKIP + set DoLater {-traceread 0 -tracewrite 0} + } + + if {$state(ReusingPlaceholder)} { + # - This request was added to the socketPhQueue of a persistent + # connection. + # - But the connection has not yet been created and is a placeholder; + # - And the placeholder was created by an earlier request. + # - When that earlier request calls OpenSocket, its placeholder is + # replaced with a true socket, and it then executes the equivalent of + # OpenSocket for any subsequent requests that have + # $state(ReusingPlaceholder). + Log >J$tk after idle coro NO - ReusingPlaceholder + } elseif {$state(alreadyQueued)} { + # - This request was added to the socketWrQueue and socketPlayCmd + # of a persistent connection that will close at the end of its current + # read operation. + Log >J$tk after idle coro NO - alreadyQueued + } else { + Log >J$tk after idle coro YES + set CoroName ${token}--SocketCoroutine + set cancel [after idle [list coroutine $CoroName ::http::OpenSocket \ + $token $DoLater]] + dict set socketCoEvent($state(socketinfo)) $token $cancel + set state(socketcoro) $cancel + } + + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::PreparePersistentConnection +# ------------------------------------------------------------------------------ +# This command is called by AsyncTransaction to initialise a "persistent +# connection" based upon a socket placeholder. It is called the first time the +# socket is associated with a "-keepalive" request. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: - DoLater, a dictionary of boolean values listing unfinished +# tasks; to be passed to ConfigureNewSocket via OpenSocket. +# ------------------------------------------------------------------------------ + +proc http::PreparePersistentConnection {token} { + variable $token + upvar 0 $token state + + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + set DoLater {-traceread 0 -tracewrite 0} + set socketMapping($state(socketinfo)) $state(sock) + set socketProxyId($state(socketinfo)) $state(proxyUsed) + # - The value of state(proxyUsed) was set in http::CreateToken to either + # "none" or "HttpProxy". + # - $token is the first transaction to use this placeholder, so there are + # no other tokens whose (proxyUsed) must be modified. + + if {![info exists socketRdState($state(socketinfo))]} { + set socketRdState($state(socketinfo)) {} + # set varName ::http::socketRdState($state(socketinfo)) + # trace add variable $varName unset ::http::CancelReadPipeline + dict set DoLater -traceread 1 + } + if {![info exists socketWrState($state(socketinfo))]} { + set socketWrState($state(socketinfo)) {} + # set varName ::http::socketWrState($state(socketinfo)) + # trace add variable $varName unset ::http::CancelWritePipeline + dict set DoLater -tracewrite 1 + } + + if {$state(-pipeline)} { + #Log new, init for pipelined, GRANT write access to $token in geturl + # Also grant premature read access to the socket. This is OK. + set socketRdState($state(socketinfo)) $token + set socketWrState($state(socketinfo)) $token + } else { + # socketWrState is not used by this non-pipelined transaction. + # We cannot leave it as "Wready" because the next call to + # http::geturl with a pipelined transaction would conclude that the + # socket is available for writing. + #Log new, init for nonpipeline, GRANT r/w access to $token in geturl + set socketRdState($state(socketinfo)) $token + set socketWrState($state(socketinfo)) $token + } + + # Value of socketPhQueue() may have already been set by ReplayCore. + if {![info exists socketPhQueue($state(sock))]} { + set socketPhQueue($state(sock)) {} + } + set socketRdQueue($state(socketinfo)) {} + set socketWrQueue($state(socketinfo)) {} + set socketClosing($state(socketinfo)) 0 + set socketPlayCmd($state(socketinfo)) {ReplayIfClose Wready {} {}} + set socketCoEvent($state(socketinfo)) {} + set socketProxyId($state(socketinfo)) {} + + return $DoLater +} + +# ------------------------------------------------------------------------------ +# Proc ::http::OpenSocket +# ------------------------------------------------------------------------------ +# This command is called as a coroutine idletask to start the asynchronous HTTP +# transaction in most cases. For the exceptions, see the calling code in +# command AsyncTransaction. +# +# Arguments: +# token - connection token (name of an array) +# DoLater - dictionary of boolean values listing unfinished tasks +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::OpenSocket {token DoLater} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + Log >K$tk Start OpenSocket coroutine + + if {![info exists state(-keepalive)]} { + # The request has already been cancelled by the calling script. + return + } + + set sockOld $state(sock) + + dict unset socketCoEvent($state(socketinfo)) $token + unset -nocomplain state(socketcoro) + + if {[catch { + if {$state(reusing)} { + # If ($state(reusing)) is true, then we do not need to create a new + # socket, even if $sockOld is only a placeholder for a socket. + set sock $sockOld + } else { + # set sock in the [catch] below. + set pre [clock milliseconds] + ##Log pre socket opened, - token $token + ##Log $state(openCmd) - token $token + set sock [namespace eval :: $state(openCmd)] + set state(sock) $sock + # Normal return from $state(openCmd) always returns a valid socket. + # A TLS proxy connection with 407 or other failure from the + # proxy server raises an error. + + # Initialisation of a new socket. + ##Log post socket opened, - token $token + ##Log socket opened, now fconfigure - token $token + set delay [expr {[clock milliseconds] - $pre}] + if {$delay > 3000} { + Log socket delay $delay - token $token + } + fconfigure $sock -translation {auto crlf} \ + -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile tcl8 + } + ##Log socket opened, DONE fconfigure - token $token + } + + Log "Using $sock for $state(socketinfo) - token $token" \ + [expr {$state(-keepalive)?"keepalive":""}] + + # Code above has set state(sock) $sock + ConfigureNewSocket $token $sockOld $DoLater + ##Log OpenSocket success $sock - token $token + } result errdict]} { + ##Log OpenSocket failed $result - token $token + # There may be other requests in the socketPhQueue. + # Prepare socketPlayCmd so that Finish will replay them. + if { ($state(-keepalive)) && (!$state(reusing)) + && [info exists socketPhQueue($sockOld)] + && ($socketPhQueue($sockOld) ne {}) + } { + if {$socketMapping($state(socketinfo)) ne $sockOld} { + Log "WARNING: this code should not be reached.\ + {$socketMapping($state(socketinfo)) ne $sockOld}" + } + set socketPlayCmd($state(socketinfo)) [list ReplayIfClose Wready {} $socketPhQueue($sockOld)] + set socketPhQueue($sockOld) {} + } + if {[string range $result 0 20] eq {proxy connect failed:}} { + # - The HTTPS proxy did not create a socket. The pre-existing value + # (a "placeholder socket") is unchanged. + # - The proxy returned a valid HTTP response to the failed CONNECT + # request, and http::SecureProxyConnect copied this to $token, + # and also set ${token}(connection) set to "close". + # - Remove the error message $result so that Finish delivers this + # HTTP response to the caller. + set result {} + } + Finish $token $result + # Because socket creation failed, the placeholder "socket" must be + # "closed" and (if persistent) removed from the persistent sockets + # table. In the {proxy connect failed:} case Finish does this because + # the value of ${token}(connection) is "close". In the other cases here, + # it does so because $result is non-empty. + } + ##Log Leaving http::OpenSocket coroutine [info coroutine] - token $token + return +} + + +# ------------------------------------------------------------------------------ +# Proc ::http::ConfigureNewSocket +# ------------------------------------------------------------------------------ +# Command to initialise a newly-created socket. Called only from OpenSocket. +# +# This command is called by OpenSocket whenever a genuine socket (sockNew) has +# been opened for for use by HTTP. It does two things: +# (1) If $token uses a placeholder socket, this command replaces the placeholder +# socket with the real socket, not only in $token but in all other requests +# that use the same placeholder. +# (2) It calls ScheduleRequest to schedule each request that uses the socket. +# +# +# Value of sockOld/sockNew can be "sock" (genuine socket) or "ph" (placeholder). +# sockNew is ${token}(sock) +# sockOld sockNew CASES +# sock sock (if $reusing, and sockOld is sock) +# ph sock (if (not $reusing), and sockOld is ph) +# ph ph (if $reusing, and sockOld is ph) - not called in this case +# sock ph (cannot occur unless a bug) - not called in this case +# (if (not $reusing), and sockOld is sock) - illogical +# +# Arguments: +# token - connection token (name of an array) +# sockOld - handle or placeholder used for a socket before the call to +# OpenSocket +# DoLater - dictionary of boolean values listing unfinished tasks +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::ConfigureNewSocket {token sockOld DoLater} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + set reusing $state(reusing) + set sock $state(sock) + set proxyUsed $state(proxyUsed) + ##Log " ConfigureNewSocket" $token $sockOld ... -- $reusing $sock $proxyUsed + + if {(!$reusing) && ($sock ne $sockOld)} { + # Replace the placeholder value sockOld with sock. + + if { [info exists socketMapping($state(socketinfo))] + && ($socketMapping($state(socketinfo)) eq $sockOld) + } { + set socketMapping($state(socketinfo)) $sock + set socketProxyId($state(socketinfo)) $proxyUsed + # tokens that use the placeholder $sockOld are updated below. + ##Log set socketMapping($state(socketinfo)) $sock + } + + # Now finish any tasks left over from PreparePersistentConnection on + # the connection. + # + # The "unset" traces are fired by init (clears entire arrays), and + # by http::Unset. + # Unset is called by CloseQueuedQueries and (possibly never) by geturl. + # + # CancelReadPipeline, CancelWritePipeline call http::Finish for each + # token. + # + # FIXME If Finish is placeholder-aware, these traces can be set earlier, + # in PreparePersistentConnection. + + if {[dict get $DoLater -traceread]} { + set varName ::http::socketRdState($state(socketinfo)) + trace add variable $varName unset ::http::CancelReadPipeline + } + if {[dict get $DoLater -tracewrite]} { + set varName ::http::socketWrState($state(socketinfo)) + trace add variable $varName unset ::http::CancelWritePipeline + } + } + + # Do this in all cases. + ScheduleRequest $token + + # Now look at all other tokens that use the placeholder $sockOld. + if { (!$reusing) + && ($sock ne $sockOld) + && [info exists socketPhQueue($sockOld)] + } { + ##Log " ConfigureNewSocket" $token scheduled, now do $socketPhQueue($sockOld) + foreach tok $socketPhQueue($sockOld) { + # 1. Amend the token's (sock). + ##Log set ${tok}(sock) $sock + set ${tok}(sock) $sock + set ${tok}(proxyUsed) $proxyUsed + + # 2. Schedule the token's HTTP request. + # Every token in socketPhQueue(*) has reusing 1 alreadyQueued 0. + set ${tok}(reusing) 1 + set ${tok}(alreadyQueued) 0 + ScheduleRequest $tok + } + set socketPhQueue($sockOld) {} + } + ##Log " ConfigureNewSocket" $token DONE + + return +} + + +# ------------------------------------------------------------------------------ +# The values of array variables socketMapping etc. +# ------------------------------------------------------------------------------ +# connId "$host:$port" +# socketMapping($connId) the handle or placeholder for the socket that is used +# for "-keepalive 1" requests to $connId. +# socketRdState($connId) the token that is currently reading from the socket. +# Other values: Rready (ready for next token to read). +# socketWrState($connId) the token that is currently writing to the socket. +# Other values: Wready (ready for next token to write), +# peNding (would be ready for next write, except that +# the integrity of a non-pipelined transaction requires +# waiting until the read(s) in progress are finished). +# socketRdQueue($connId) List of tokens that are queued for reading later. +# socketWrQueue($connId) List of tokens that are queued for writing later. +# socketPhQueue($sock) List of tokens that are queued to use a placeholder +# socket, when the real socket has not yet been created. +# socketClosing($connId) (boolean) true iff a server response header indicates +# that the server will close the connection at the end of +# the current response. +# socketPlayCmd($connId) The command to execute to replay pending and +# part-completed transactions if the socket closes early. +# socketCoEvent($connId) Identifier for the "after idle" event that will launch +# an OpenSocket coroutine to open or re-use a socket. +# socketProxyId($connId) The type of proxy that this socket uses: values are +# those of state(proxyUsed) i.e. none, HttpProxy, +# SecureProxy, and SecureProxyFailed. +# The value is not used for anything by http, its purpose +# is to set the value of state() for caller information. +# ------------------------------------------------------------------------------ + + +# ------------------------------------------------------------------------------ +# Using socketWrState(*), socketWrQueue(*), socketRdState(*), socketRdQueue(*) +# ------------------------------------------------------------------------------ +# The element socketWrState($connId) has a value which is either the name of +# the token that is permitted to write to the socket, or "Wready" if no +# token is permitted to write. +# +# The code that sets the value to Wready immediately calls +# http::NextPipelinedWrite, which examines socketWrQueue($connId) and +# processes the next request in the queue, if there is one. The value +# Wready is not found when the interpreter is in the event loop unless the +# socket is idle. +# +# The element socketRdState($connId) has a value which is either the name of +# the token that is permitted to read from the socket, or "Rready" if no +# token is permitted to read. +# +# The code that sets the value to Rready then examines +# socketRdQueue($connId) and processes the next request in the queue, if +# there is one. The value Rready is not found when the interpreter is in +# the event loop unless the socket is idle. +# ------------------------------------------------------------------------------ + + +# ------------------------------------------------------------------------------ +# Proc http::ScheduleRequest +# ------------------------------------------------------------------------------ +# Command to either begin the HTTP request, or add it to the appropriate queue. +# Called from two places in ConfigureNewSocket. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::ScheduleRequest {token} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + Log >L$tk ScheduleRequest + + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + set Unfinished 0 + + set reusing $state(reusing) + set sockNew $state(sock) + + # The "if" tests below: must test against the current values of + # socketWrState, socketRdState, and so the tests must be done here, + # not earlier in PreparePersistentConnection. + + if {$state(alreadyQueued)} { + # The request has been appended to the queue of a persistent socket + # (that is scheduled to close and have its queue replayed). + # + # A write may or may not be in progress. There is no need to set + # socketWrState to prevent another call stealing write access - all + # subsequent calls on this socket will come here because the socket + # will close after the current read, and its + # socketClosing($connId) is 1. + ##Log "HTTP request for token $token is queued" + + } elseif { $reusing + && $state(-pipeline) + && ($socketWrState($state(socketinfo)) ne "Wready") + } { + ##Log "HTTP request for token $token is queued for pipelined use" + lappend socketWrQueue($state(socketinfo)) $token + + } elseif { $reusing + && (!$state(-pipeline)) + && ($socketWrState($state(socketinfo)) ne "Wready") + } { + # A write is queued or in progress. Lappend to the write queue. + ##Log "HTTP request for token $token is queued for nonpipeline use" + lappend socketWrQueue($state(socketinfo)) $token + + } elseif { $reusing + && (!$state(-pipeline)) + && ($socketWrState($state(socketinfo)) eq "Wready") + && ($socketRdState($state(socketinfo)) ne "Rready") + } { + # A read is queued or in progress, but not a write. Cannot start the + # nonpipeline transaction, but must set socketWrState to prevent a + # pipelined request jumping the queue. + ##Log "HTTP request for token $token is queued for nonpipeline use" + #Log re-use nonpipeline, GRANT delayed write access to $token in geturl + set socketWrState($state(socketinfo)) peNding + lappend socketWrQueue($state(socketinfo)) $token + + } else { + if {$reusing && $state(-pipeline)} { + #Log new, init for pipelined, GRANT write access to $token in geturl + # DO NOT grant premature read access to the socket. + # set socketRdState($state(socketinfo)) $token + set socketWrState($state(socketinfo)) $token + } elseif {$reusing} { + # socketWrState is not used by this non-pipelined transaction. + # We cannot leave it as "Wready" because the next call to + # http::geturl with a pipelined transaction would conclude that the + # socket is available for writing. + #Log new, init for nonpipeline, GRANT r/w access to $token in geturl + set socketRdState($state(socketinfo)) $token + set socketWrState($state(socketinfo)) $token + } else { + } + + # Process the request now. + # - Command is not called unless $state(sock) is a real socket handle + # and not a placeholder. + # - All (!$reusing) cases come here. + # - Some $reusing cases come here too if the connection is + # marked as ready. Those $reusing cases are: + # $reusing && ($socketWrState($state(socketinfo)) eq "Wready") && + # EITHER !$pipeline && ($socketRdState($state(socketinfo)) eq "Rready") + # OR $pipeline + # + #Log ---- $state(socketinfo) << conn to $token for HTTP request (a) + ##Log " ScheduleRequest" $token -- fileevent $state(sock) writable for $token + # Connect does its own fconfigure. + + lassign $state(connArgs) proto phost srvurl + + if {[catch { + fileevent $state(sock) writable \ + [list http::Connect $token $proto $phost $srvurl] + } res opts]} { + # The socket no longer exists. + ##Log bug -- socket gone -- $res -- $opts + } + + } + + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::SendHeader +# ------------------------------------------------------------------------------ +# Command to send a request header, and keep a copy in state(requestHeaders) +# for debugging purposes. +# +# Arguments: +# token - connection token (name of an array) +# key - header name +# value - header value +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::SendHeader {token key value} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + set sock $state(sock) + lappend state(requestHeaders) [string tolower $key] $value + puts $sock "$key: $value" + return +} + +# http::Connected -- +# +# Callback used when the connection to the HTTP server is actually +# established. +# +# Arguments: +# token State token. +# proto What protocol (http, https, etc.) was used to connect. +# phost Are we using keep-alive? Non-empty if yes. +# srvurl Service-local URL that we're requesting +# Results: +# None. + +proc http::Connected {token proto phost srvurl} { + variable http + variable urlTypes + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + if {$state(reusing) && (!$state(-pipeline)) && ($state(-timeout) > 0)} { + set state(after) [after $state(-timeout) \ + [list http::reset $token timeout]] + } + + # Set back the variables needed here. + set sock $state(sock) + set isQueryChannel [info exists state(-querychannel)] + set isQuery [info exists state(-query)] + regexp {^(.+):([^:]+)$} $state(socketinfo) {} host port + + set lower [string tolower $proto] + set defport [lindex $urlTypes($lower) 0] + + # Send data in cr-lf format, but accept any line terminators. + # Initialisation to {auto *} now done in geturl, KeepSocket and DoneRequest. + # We are concerned here with the request (write) not the response (read). + lassign [fconfigure $sock -translation] trRead trWrite + fconfigure $sock -translation [list $trRead crlf] \ + -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile tcl8 + } + + # The following is disallowed in safe interpreters, but the socket is + # already in non-blocking mode in that case. + + catch {fconfigure $sock -blocking off} + set how GET + if {$isQuery} { + set state(querylength) [string length $state(-query)] + if {$state(querylength) > 0} { + set how POST + set contDone 0 + } else { + # There's no query data. + unset state(-query) + set isQuery 0 + } + } elseif {$state(-validate)} { + set how HEAD + } elseif {$isQueryChannel} { + set how POST + # The query channel must be blocking for the async Write to + # work properly. + fconfigure $state(-querychannel) -blocking 1 -translation binary + set contDone 0 + } + if {[info exists state(-method)] && ($state(-method) ne "")} { + set how $state(-method) + } + set accept_types_seen 0 + + Log ^B$tk begin sending request - token $token + + if {[catch { + if {[info exists state(bypass)]} { + set state(method) [lindex [split $state(bypass) { }] 0] + set state(requestHeaders) {} + set state(requestLine) $state(bypass) + } else { + set state(method) $how + set state(requestHeaders) {} + set state(requestLine) "$how $srvurl HTTP/$state(-protocol)" + } + puts $sock $state(requestLine) + set hostValue [GetFieldValue $state(-headers) Host] + if {$hostValue ne {}} { + # Allow Host spoofing. [Bug 928154] + regexp {^[^:]+} $hostValue state(host) + SendHeader $token Host $hostValue + } elseif {$port == $defport} { + # Don't add port in this case, to handle broken servers. [Bug + # #504508] + set state(host) $host + SendHeader $token Host $host + } else { + set state(host) $host + SendHeader $token Host "$host:$port" + } + SendHeader $token User-Agent $http(-useragent) + if {($state(-protocol) > 1.0) && $state(-keepalive)} { + # Send this header, because a 1.1 server is not compelled to treat + # this as the default. + set ConnVal keep-alive + } elseif {($state(-protocol) > 1.0)} { + # RFC2616 sec 8.1.2.1 + set ConnVal close + } else { + # ($state(-protocol) <= 1.0) + # RFC7230 A.1 + # Some server implementations of HTTP/1.0 have a faulty + # implementation of RFC 2068 Keep-Alive. + # Don't leave this to chance. + # For HTTP/1.0 we have already "set state(connection) close" + # and "state(-keepalive) 0". + set ConnVal close + } + # Proxy authorisation (cf. mod by Anders Ramdahl to autoproxy by + # Pat Thoyts). + if {($http(-proxyauth) ne {}) && ($state(proxyUsed) eq {HttpProxy})} { + SendHeader $token Proxy-Authorization $http(-proxyauth) + } + # RFC7230 A.1 - "clients are encouraged not to send the + # Proxy-Connection header field in any requests" + set accept_encoding_seen 0 + set content_type_seen 0 + set connection_seen 0 + foreach {key value} $state(-headers) { + set value [string map [list \n "" \r ""] $value] + set key [string map {" " -} [string trim $key]] + if {[string equal -nocase $key "host"]} { + continue + } + if {[string equal -nocase $key "accept-encoding"]} { + set accept_encoding_seen 1 + } + if {[string equal -nocase $key "accept"]} { + set accept_types_seen 1 + } + if {[string equal -nocase $key "content-type"]} { + set content_type_seen 1 + } + if {[string equal -nocase $key "content-length"]} { + set contDone 1 + set state(querylength) $value + } + if { [string equal -nocase $key "connection"] + && [info exists state(bypass)] + } { + # Value supplied in -headers overrides $ConnVal. + set connection_seen 1 + } elseif {[string equal -nocase $key "connection"]} { + # Remove "close" or "keep-alive" and use our own value. + # In an upgrade request, the upgrade is not guaranteed. + # Value "close" or "keep-alive" tells the server what to do + # if it refuses the upgrade. We send a single "Connection" + # header because some websocket servers, e.g. civetweb, reject + # multiple headers. Bug [d01de3281f] of tcllib/websocket. + set connection_seen 1 + set listVal $state(connectionValues) + if {[set pos [lsearch $listVal close]] != -1} { + set listVal [lreplace $listVal $pos $pos] + } + if {[set pos [lsearch $listVal keep-alive]] != -1} { + set listVal [lreplace $listVal $pos $pos] + } + lappend listVal $ConnVal + set value [join $listVal {, }] + } + if {[string length $key]} { + SendHeader $token $key $value + } + } + # Allow overriding the Accept header on a per-connection basis. Useful + # for working with REST services. [Bug c11a51c482] + if {!$accept_types_seen} { + SendHeader $token Accept $state(accept-types) + } + if { (!$accept_encoding_seen) + && (![info exists state(-handler)]) + && $http(-zip) + } { + SendHeader $token Accept-Encoding gzip,deflate + } elseif {!$accept_encoding_seen} { + SendHeader $token Accept-Encoding identity + } else { + } + if {!$connection_seen} { + SendHeader $token Connection $ConnVal + } + if {$isQueryChannel && ($state(querylength) == 0)} { + # Try to determine size of data in channel. If we cannot seek, the + # surrounding catch will trap us + + set start [tell $state(-querychannel)] + seek $state(-querychannel) 0 end + set state(querylength) \ + [expr {[tell $state(-querychannel)] - $start}] + seek $state(-querychannel) $start + } + + # Note that we don't do Cookie2; that's much nastier and not normally + # observed in practice either. It also doesn't fix the multitude of + # bugs in the basic cookie spec. + if {$http(-cookiejar) ne ""} { + set cookies "" + set separator "" + foreach {key value} [{*}$http(-cookiejar) \ + getCookies $proto $host $state(path)] { + append cookies $separator $key = $value + set separator "; " + } + if {$cookies ne ""} { + SendHeader $token Cookie $cookies + } + } + + # Flush the request header and set up the fileevent that will either + # push the POST data or read the response. + # + # fileevent note: + # + # It is possible to have both the read and write fileevents active at + # this point. The only scenario it seems to affect is a server that + # closes the connection without reading the POST data. (e.g., early + # versions TclHttpd in various error cases). Depending on the + # platform, the client may or may not be able to get the response from + # the server because of the error it will get trying to write the post + # data. Having both fileevents active changes the timing and the + # behavior, but no two platforms (among Solaris, Linux, and NT) behave + # the same, and none behave all that well in any case. Servers should + # always read their POST data if they expect the client to read their + # response. + + if {$isQuery || $isQueryChannel} { + # POST method. + if {!$content_type_seen} { + SendHeader $token Content-Type $state(-type) + } + if {!$contDone} { + SendHeader $token Content-Length $state(querylength) + } + puts $sock "" + flush $sock + # Flush flushes the error in the https case with a bad handshake: + # else the socket never becomes writable again, and hangs until + # timeout (if any). + + lassign [fconfigure $sock -translation] trRead trWrite + fconfigure $sock -translation [list $trRead binary] + fileevent $sock writable [list http::Write $token] + # The http::Write command decides when to make the socket readable, + # using the same test as the GET/HEAD case below. + } else { + # GET or HEAD method. + if { (![catch {fileevent $sock readable} binding]) + && ($binding eq [list http::CheckEof $sock]) + } { + # Remove the "fileevent readable" binding of an idle persistent + # socket to http::CheckEof. We can no longer treat bytes + # received as junk. The server might still time out and + # half-close the socket if it has not yet received the first + # "puts". + fileevent $sock readable {} + } + puts $sock "" + flush $sock + Log ^C$tk end sending request - token $token + # End of writing (GET/HEAD methods). The request has been sent. + + DoneRequest $token + } + + } err]} { + # The socket probably was never connected, OR the connection dropped + # later, OR https handshake error, which may be discovered as late as + # the "flush" command above... + Log "WARNING - if testing, pay special attention to this\ + case (GI) which is seldom executed - token $token" + if {[info exists state(reusing)] && $state(reusing)} { + # The socket was closed at the server end, and closed at + # this end by http::CheckEof. + if {[TestForReplay $token write $err a]} { + return + } else { + Finish $token {failed to re-use socket} + } + + # else: + # This is NOT a persistent socket that has been closed since its + # last use. + # If any other requests are in flight or pipelined/queued, they will + # be discarded. + } elseif {$state(status) eq ""} { + # https handshake errors come here, for + # Tcl 8.7 without http::SecureProxyConnect, and for Tcl 8.6. + set msg [registerError $sock] + registerError $sock {} + if {$msg eq {}} { + set msg {failed to use socket} + } + Finish $token $msg + } elseif {$state(status) ne "error"} { + Finish $token $err + } + } + return +} + +# http::registerError +# +# Called (for example when processing TclTLS activity) to register +# an error for a connection on a specific socket. This helps +# http::Connected to deliver meaningful error messages, e.g. when a TLS +# certificate fails verification. +# +# Usage: http::registerError socket ?newValue? +# +# "set" semantics, except that a "get" (a call without a new value) for a +# non-existent socket returns {}, not an error. + +proc http::registerError {sock args} { + variable registeredErrors + + if { ([llength $args] == 0) + && (![info exists registeredErrors($sock)]) + } { + return + } elseif { ([llength $args] == 1) + && ([lindex $args 0] eq {}) + } { + unset -nocomplain registeredErrors($sock) + return + } + set registeredErrors($sock) {*}$args +} + +# http::DoneRequest -- +# +# Command called when a request has been sent. It will arrange the +# next request and/or response as appropriate. +# +# If this command is called when $socketClosing(*), the request $token +# that calls it must be pipelined and destined to fail. + +proc http::DoneRequest {token} { + variable http + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + set tk [namespace tail $token] + set sock $state(sock) + + # If pipelined, connect the next HTTP request to the socket. + if {$state(reusing) && $state(-pipeline)} { + # Enable next token (if any) to write. + # The value "Wready" is set only here, and + # in http::Event after reading the response-headers of a + # non-reusing transaction. + # Previous value is $token. It cannot be pending. + set socketWrState($state(socketinfo)) Wready + + # Now ready to write the next pipelined request (if any). + http::NextPipelinedWrite $token + } else { + # If pipelined, this is the first transaction on this socket. We wait + # for the response headers to discover whether the connection is + # persistent. (If this is not done and the connection is not + # persistent, we SHOULD retry and then MUST NOT pipeline before knowing + # that we have a persistent connection + # (rfc2616 8.1.2.2)). + } + + # Connect to receive the response, unless the socket is pipelined + # and another response is being sent. + # This code block is separate from the code below because there are + # cases where socketRdState already has the value $token. + if { $state(-keepalive) + && $state(-pipeline) + && [info exists socketRdState($state(socketinfo))] + && ($socketRdState($state(socketinfo)) eq "Rready") + } { + #Log pipelined, GRANT read access to $token in Connected + set socketRdState($state(socketinfo)) $token + } + + if { $state(-keepalive) + && $state(-pipeline) + && [info exists socketRdState($state(socketinfo))] + && ($socketRdState($state(socketinfo)) ne $token) + } { + # Do not read from the socket until it is ready. + ##Log "HTTP response for token $token is queued for pipelined use" + # If $socketClosing(*), then the caller will be a pipelined write and + # execution will come here. + # This token has already been recorded as "in flight" for writing. + # When the socket is closed, the read queue will be cleared in + # CloseQueuedQueries and so the "lappend" here has no effect. + lappend socketRdQueue($state(socketinfo)) $token + } else { + # In the pipelined case, connection for reading depends on the + # value of socketRdState. + # In the nonpipeline case, connection for reading always occurs. + ReceiveResponse $token + } + return +} + +# http::ReceiveResponse +# +# Connects token to its socket for reading. + +proc http::ReceiveResponse {token} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + set sock $state(sock) + + #Log ---- $state(socketinfo) >> conn to $token for HTTP response + lassign [fconfigure $sock -translation] trRead trWrite + fconfigure $sock -translation [list auto $trWrite] \ + -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile tcl8 + } + Log ^D$tk begin receiving response - token $token + + coroutine ${token}--EventCoroutine http::Event $sock $token + if {[info exists state(-handler)] || [info exists state(-progress)]} { + fileevent $sock readable [list http::EventGateway $sock $token] + } else { + fileevent $sock readable ${token}--EventCoroutine + } + return +} + + +# http::EventGateway +# +# Bug [c2dc1da315]. +# - Recursive launch of the coroutine can occur if a -handler or -progress +# callback is used, and the callback command enters the event loop. +# - To prevent this, the fileevent "binding" is disabled while the +# coroutine is in flight. +# - If a recursive call occurs despite these precautions, it is not +# trapped and discarded here, because it is better to report it as a +# bug. +# - Although this solution is believed to be sufficiently general, it is +# used only if -handler or -progress is specified. In other cases, +# the coroutine is called directly. + +proc http::EventGateway {sock token} { + variable $token + upvar 0 $token state + fileevent $sock readable {} + catch {${token}--EventCoroutine} res opts + if {[info commands ${token}--EventCoroutine] ne {}} { + # The coroutine can be deleted by completion (a non-yield return), by + # http::Finish (when there is a premature end to the transaction), by + # http::reset or http::cleanup, or if the caller set option -channel + # but not option -handler: in the last case reading from the socket is + # now managed by commands ::http::Copy*, http::ReceiveChunked, and + # http::MakeTransformationChunked. + # + # Catch in case the coroutine has closed the socket. + catch {fileevent $sock readable [list http::EventGateway $sock $token]} + } + + # If there was an error, re-throw it. + return -options $opts $res +} + + +# http::NextPipelinedWrite +# +# - Connecting a socket to a token for writing is done by this command and by +# command KeepSocket. +# - If another request has a pipelined write scheduled for $token's socket, +# and if the socket is ready to accept it, connect the write and update +# the queue accordingly. +# - This command is called from http::DoneRequest and http::Event, +# IF $state(-pipeline) AND (the current transfer has reached the point at +# which the socket is ready for the next request to be written). +# - This command is called when a token has write access and is pipelined and +# keep-alive, and sets socketWrState to Wready. +# - The command need not consider the case where socketWrState is set to a token +# that does not yet have write access. Such a token is waiting for Rready, +# and the assignment of the connection to the token will be done elsewhere (in +# http::KeepSocket). +# - This command cannot be called after socketWrState has been set to a +# "pending" token value (that is then overwritten by the caller), because that +# value is set by this command when it is called by an earlier token when it +# relinquishes its write access, and the pending token is always the next in +# line to write. + +proc http::NextPipelinedWrite {token} { + variable http + variable socketRdState + variable socketWrState + variable socketWrQueue + variable socketClosing + variable $token + upvar 0 $token state + set connId $state(socketinfo) + + if { [info exists socketClosing($connId)] + && $socketClosing($connId) + } { + # socketClosing(*) is set because the server has sent a + # "Connection: close" header. + # Behave as if the queues are empty - so do nothing. + } elseif { $state(-pipeline) + && [info exists socketWrState($connId)] + && ($socketWrState($connId) eq "Wready") + + && [info exists socketWrQueue($connId)] + && [llength $socketWrQueue($connId)] + && ([set token2 [lindex $socketWrQueue($connId) 0] + set ${token2}(-pipeline) + ] + ) + } { + # - The usual case for a pipelined connection, ready for a new request. + #Log pipelined, GRANT write access to $token2 in NextPipelinedWrite + set conn [set ${token2}(connArgs)] + set socketWrState($connId) $token2 + set socketWrQueue($connId) [lrange $socketWrQueue($connId) 1 end] + # Connect does its own fconfigure. + fileevent $state(sock) writable [list http::Connect $token2 {*}$conn] + #Log ---- $connId << conn to $token2 for HTTP request (b) + + # In the tests below, the next request will be nonpipeline. + } elseif { $state(-pipeline) + && [info exists socketWrState($connId)] + && ($socketWrState($connId) eq "Wready") + + && [info exists socketWrQueue($connId)] + && [llength $socketWrQueue($connId)] + && (![ set token3 [lindex $socketWrQueue($connId) 0] + set ${token3}(-pipeline) + ] + ) + + && [info exists socketRdState($connId)] + && ($socketRdState($connId) eq "Rready") + } { + # The case in which the next request will be non-pipelined, and the read + # and write queues is ready: which is the condition for a non-pipelined + # write. + set conn [set ${token3}(connArgs)] + #Log nonpipeline, GRANT r/w access to $token3 in NextPipelinedWrite + set socketRdState($connId) $token3 + set socketWrState($connId) $token3 + set socketWrQueue($connId) [lrange $socketWrQueue($connId) 1 end] + # Connect does its own fconfigure. + fileevent $state(sock) writable [list http::Connect $token3 {*}$conn] + #Log ---- $state(sock) << conn to $token3 for HTTP request (c) + + } elseif { $state(-pipeline) + && [info exists socketWrState($connId)] + && ($socketWrState($connId) eq "Wready") + + && [info exists socketWrQueue($connId)] + && [llength $socketWrQueue($connId)] + && (![set token2 [lindex $socketWrQueue($connId) 0] + set ${token2}(-pipeline) + ] + ) + } { + # - The case in which the next request will be non-pipelined, but the + # read queue is NOT ready. + # - A read is queued or in progress, but not a write. Cannot start the + # nonpipeline transaction, but must set socketWrState to prevent a new + # pipelined request (in http::geturl) jumping the queue. + # - Because socketWrState($connId) is not set to Wready, the assignment + # of the connection to $token2 will be done elsewhere - by command + # http::KeepSocket when $socketRdState($connId) is set to "Rready". + + #Log re-use nonpipeline, GRANT delayed write access to $token in NextP.. + set socketWrState($connId) peNding + } + return +} + +# http::CancelReadPipeline +# +# Cancel pipelined responses on a closing "Keep-Alive" socket. +# +# - Called by a variable trace on "unset socketRdState($connId)". +# - The variable relates to a Keep-Alive socket, which has been closed. +# - Cancels all pipelined responses. The requests have been sent, +# the responses have not yet been received. +# - This is a hard cancel that ends each transaction with error status, +# and closes the connection. Do not use it if you want to replay failed +# transactions. +# - N.B. Always delete ::http::socketRdState($connId) before deleting +# ::http::socketRdQueue($connId), or this command will do nothing. +# +# Arguments +# As for a trace command on a variable. + +proc http::CancelReadPipeline {name1 connId op} { + variable socketRdQueue + ##Log CancelReadPipeline $name1 $connId $op + if {[info exists socketRdQueue($connId)]} { + set msg {the connection was closed by CancelReadPipeline} + foreach token $socketRdQueue($connId) { + set tk [namespace tail $token] + Log ^X$tk end of response "($msg)" - token $token + set ${token}(status) eof + Finish $token ;#$msg + } + set socketRdQueue($connId) {} + } + return +} + +# http::CancelWritePipeline +# +# Cancel queued events on a closing "Keep-Alive" socket. +# +# - Called by a variable trace on "unset socketWrState($connId)". +# - The variable relates to a Keep-Alive socket, which has been closed. +# - In pipelined or nonpipeline case: cancels all queued requests. The +# requests have not yet been sent, the responses are not due. +# - This is a hard cancel that ends each transaction with error status, +# and closes the connection. Do not use it if you want to replay failed +# transactions. +# - N.B. Always delete ::http::socketWrState($connId) before deleting +# ::http::socketWrQueue($connId), or this command will do nothing. +# +# Arguments +# As for a trace command on a variable. + +proc http::CancelWritePipeline {name1 connId op} { + variable socketWrQueue + + ##Log CancelWritePipeline $name1 $connId $op + if {[info exists socketWrQueue($connId)]} { + set msg {the connection was closed by CancelWritePipeline} + foreach token $socketWrQueue($connId) { + set tk [namespace tail $token] + Log ^X$tk end of response "($msg)" - token $token + set ${token}(status) eof + Finish $token ;#$msg + } + set socketWrQueue($connId) {} + } + return +} + +# http::ReplayIfDead -- +# +# - A query on a re-used persistent socket failed at the earliest opportunity, +# because the socket had been closed by the server. Keep the token, tidy up, +# and try to connect on a fresh socket. +# - The connection is monitored for eof by the command http::CheckEof. Thus +# http::ReplayIfDead is needed only when a server event (half-closing an +# apparently idle connection), and a client event (sending a request) occur at +# almost the same time, and neither client nor server detects the other's +# action before performing its own (an "asynchronous close event"). +# - To simplify testing of http::ReplayIfDead, set TEST_EOF 1 in +# http::KeepSocket, and then http::ReplayIfDead will be called if http::geturl +# is called at any time after the server timeout. +# +# Arguments: +# token Connection token. +# +# Side Effects: +# Use the same token, but try to open a new socket. + +proc http::ReplayIfDead {token doing} { + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + + Log running http::ReplayIfDead for $token $doing + + # 1. Merge the tokens for transactions in flight, the read (response) queue, + # and the write (request) queue. + + set InFlightR {} + set InFlightW {} + + # Obtain the tokens for transactions in flight. + if {$state(-pipeline)} { + # Two transactions may be in flight. The "read" transaction was first. + # It is unlikely that the server would close the socket if a response + # was pending; however, an earlier request (as well as the present + # request) may have been sent and ignored if the socket was half-closed + # by the server. + + if { [info exists socketRdState($state(socketinfo))] + && ($socketRdState($state(socketinfo)) ne "Rready") + } { + lappend InFlightR $socketRdState($state(socketinfo)) + } elseif {($doing eq "read")} { + lappend InFlightR $token + } + + if { [info exists socketWrState($state(socketinfo))] + && $socketWrState($state(socketinfo)) ni {Wready peNding} + } { + lappend InFlightW $socketWrState($state(socketinfo)) + } elseif {($doing eq "write")} { + lappend InFlightW $token + } + + # Report any inconsistency of $token with socket*state. + if { ($doing eq "read") + && [info exists socketRdState($state(socketinfo))] + && ($token ne $socketRdState($state(socketinfo))) + } { + Log WARNING - ReplayIfDead pipelined token $token $doing \ + ne socketRdState($state(socketinfo)) \ + $socketRdState($state(socketinfo)) + + } elseif { + ($doing eq "write") + && [info exists socketWrState($state(socketinfo))] + && ($token ne $socketWrState($state(socketinfo))) + } { + Log WARNING - ReplayIfDead pipelined token $token $doing \ + ne socketWrState($state(socketinfo)) \ + $socketWrState($state(socketinfo)) + } + } else { + # One transaction should be in flight. + # socketRdState, socketWrQueue are used. + # socketRdQueue should be empty. + + # Report any inconsistency of $token with socket*state. + if {$token ne $socketRdState($state(socketinfo))} { + Log WARNING - ReplayIfDead nonpipeline token $token $doing \ + ne socketRdState($state(socketinfo)) \ + $socketRdState($state(socketinfo)) + } + + # Report the inconsistency that socketRdQueue is non-empty. + if { [info exists socketRdQueue($state(socketinfo))] + && ($socketRdQueue($state(socketinfo)) ne {}) + } { + Log WARNING - ReplayIfDead nonpipeline token $token $doing \ + has read queue socketRdQueue($state(socketinfo)) \ + $socketRdQueue($state(socketinfo)) ne {} + } + + lappend InFlightW $socketRdState($state(socketinfo)) + set socketRdQueue($state(socketinfo)) {} + } + + set newQueue {} + lappend newQueue {*}$InFlightR + lappend newQueue {*}$socketRdQueue($state(socketinfo)) + lappend newQueue {*}$InFlightW + lappend newQueue {*}$socketWrQueue($state(socketinfo)) + + + # 2. Tidy up token. This is a cut-down form of Finish/CloseSocket. + # Do not change state(status). + # No need to after cancel state(after) - either this is done in + # ReplayCore/ReInit, or Finish is called. + + catch {close $state(sock)} + Unset $state(socketinfo) + + # 2a. Tidy the tokens in the queues - this is done in ReplayCore/ReInit. + # - Transactions, if any, that are awaiting responses cannot be completed. + # They are listed for re-sending in newQueue. + # - All tokens are preserved for re-use by ReplayCore, and their variables + # will be re-initialised by calls to ReInit. + # - The relevant element of socketMapping, socketRdState, socketWrState, + # socketRdQueue, socketWrQueue, socketClosing, socketPlayCmd will be set + # to new values in ReplayCore. + + ReplayCore $newQueue + return +} + +# http::ReplayIfClose -- +# +# A request on a socket that was previously "Connection: keep-alive" has +# received a "Connection: close" response header. The server supplies +# that response correctly, but any later requests already queued on this +# connection will be lost when the socket closes. +# +# This command takes arguments that represent the socketWrState, +# socketRdQueue and socketWrQueue for this connection. The socketRdState +# is not needed because the server responds in full to the request that +# received the "Connection: close" response header. +# +# Existing request tokens $token (::http::$n) are preserved. The caller +# will be unaware that the request was processed this way. + +proc http::ReplayIfClose {Wstate Rqueue Wqueue} { + Log running http::ReplayIfClose for $Wstate $Rqueue $Wqueue + + if {$Wstate in $Rqueue || $Wstate in $Wqueue} { + Log WARNING duplicate token in http::ReplayIfClose - token $Wstate + set Wstate Wready + } + + # 1. Create newQueue + set InFlightW {} + if {$Wstate ni {Wready peNding}} { + lappend InFlightW $Wstate + } + ##Log $Rqueue -- $InFlightW -- $Wqueue + set newQueue {} + lappend newQueue {*}$Rqueue + lappend newQueue {*}$InFlightW + lappend newQueue {*}$Wqueue + + # 2. Cleanup - none needed, done by the caller. + + ReplayCore $newQueue + return +} + +# http::ReInit -- +# +# Command to restore a token's state to a condition that +# makes it ready to replay a request. +# +# Command http::geturl stores extra state in state(tmp*) so +# we don't need to do the argument processing again. +# +# The caller must: +# - Set state(reusing) and state(sock) to their new values after calling +# this command. +# - Unset state(tmpState), state(tmpOpenCmd) if future calls to ReplayCore +# or ReInit are inappropriate for this token. Typically only one retry +# is allowed. +# The caller may also unset state(tmpConnArgs) if this value (and the +# token) will be used immediately. The value is needed by tokens that +# will be stored in a queue. +# +# Arguments: +# token Connection token. +# +# Return Value: (boolean) true iff the re-initialisation was successful. + +proc http::ReInit {token} { + variable $token + upvar 0 $token state + + if {!( + [info exists state(tmpState)] + && [info exists state(tmpOpenCmd)] + && [info exists state(tmpConnArgs)] + ) + } { + Log FAILED in http::ReInit via ReplayCore - NO tmp vars for $token + return 0 + } + + if {[info exists state(after)]} { + after cancel $state(after) + unset state(after) + } + if {[info exists state(socketcoro)]} { + Log $token Cancel socket after-idle event (ReInit) + after cancel $state(socketcoro) + unset state(socketcoro) + } + + # Don't alter state(status) - this would trigger http::wait if it is in use. + set tmpState $state(tmpState) + set tmpOpenCmd $state(tmpOpenCmd) + set tmpConnArgs $state(tmpConnArgs) + foreach name [array names state] { + if {$name ne "status"} { + unset state($name) + } + } + + # Don't alter state(status). + # Restore state(tmp*) - the caller may decide to unset them. + # Restore state(tmpConnArgs) which is needed for connection. + # state(tmpState), state(tmpOpenCmd) are needed only for retries. + + dict unset tmpState status + array set state $tmpState + set state(tmpState) $tmpState + set state(tmpOpenCmd) $tmpOpenCmd + set state(tmpConnArgs) $tmpConnArgs + + return 1 +} + +# http::ReplayCore -- +# +# Command to replay a list of requests, using existing connection tokens. +# +# Abstracted from http::geturl which stores extra state in state(tmp*) so +# we don't need to do the argument processing again. +# +# Arguments: +# newQueue List of connection tokens. +# +# Side Effects: +# Use existing tokens, but try to open a new socket. + +proc http::ReplayCore {newQueue} { + variable TmpSockCounter + + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + if {[llength $newQueue] == 0} { + # Nothing to do. + return + } + + ##Log running ReplayCore for {*}$newQueue + set newToken [lindex $newQueue 0] + set newQueue [lrange $newQueue 1 end] + + # 3. Use newToken, and restore its values of state(*). Do not restore + # elements tmp* - we try again only once. + + set token $newToken + variable $token + upvar 0 $token state + + if {![ReInit $token]} { + Log FAILED in http::ReplayCore - NO tmp vars + Log ReplayCore reject $token + Finish $token {cannot send this request again} + return + } + + set tmpState $state(tmpState) + set tmpOpenCmd $state(tmpOpenCmd) + set tmpConnArgs $state(tmpConnArgs) + unset state(tmpState) + unset state(tmpOpenCmd) + unset state(tmpConnArgs) + + set state(reusing) 0 + set state(ReusingPlaceholder) 0 + set state(alreadyQueued) 0 + Log ReplayCore replay $token + + # Give the socket a placeholder name before it is created. + set sock HTTP_PLACEHOLDER_[incr TmpSockCounter] + set state(sock) $sock + + # Move the $newQueue into the placeholder socket's socketPhQueue. + set socketPhQueue($sock) {} + foreach tok $newQueue { + if {[ReInit $tok]} { + set ${tok}(reusing) 1 + set ${tok}(sock) $sock + lappend socketPhQueue($sock) $tok + Log ReplayCore replay $tok + } else { + Log ReplayCore reject $tok + set ${tok}(reusing) 1 + set ${tok}(sock) NONE + Finish $tok {cannot send this request again} + } + } + + AsyncTransaction $token + + return +} + +# Data access functions: +# Data - the URL data +# Status - the transaction status: ok, reset, eof, timeout, error +# Code - the HTTP transaction code, e.g., 200 +# Size - the size of the URL data + +proc http::responseBody {token} { + variable $token + upvar 0 $token state + return $state(body) +} +proc http::status {token} { + if {![info exists $token]} { + return "error" + } + variable $token + upvar 0 $token state + return $state(status) +} +proc http::responseLine {token} { + variable $token + upvar 0 $token state + return $state(http) +} +proc http::requestLine {token} { + variable $token + upvar 0 $token state + return $state(requestLine) +} +proc http::responseCode {token} { + variable $token + upvar 0 $token state + if {[regexp {[0-9]{3}} $state(http) numeric_code]} { + return $numeric_code + } else { + return $state(http) + } +} +proc http::size {token} { + variable $token + upvar 0 $token state + return $state(currentsize) +} +proc http::requestHeaders {token args} { + set lenny [llength $args] + if {$lenny > 1} { + return -code error {usage: ::http::requestHeaders token ?headerName?} + } else { + return [Meta $token request {*}$args] + } +} +proc http::responseHeaders {token args} { + set lenny [llength $args] + if {$lenny > 1} { + return -code error {usage: ::http::responseHeaders token ?headerName?} + } else { + return [Meta $token response {*}$args] + } +} +proc http::requestHeaderValue {token header} { + Meta $token request $header VALUE +} +proc http::responseHeaderValue {token header} { + Meta $token response $header VALUE +} +proc http::Meta {token who args} { + variable $token + upvar 0 $token state + + if {$who eq {request}} { + set whom requestHeaders + } elseif {$who eq {response}} { + set whom meta + } else { + return -code error {usage: ::http::Meta token request|response ?headerName ?VALUE??} + } + + set header [string tolower [lindex $args 0]] + set how [string tolower [lindex $args 1]] + set lenny [llength $args] + if {$lenny == 0} { + return $state($whom) + } elseif {($lenny > 2) || (($lenny == 2) && ($how ne {value}))} { + return -code error {usage: ::http::Meta token request|response ?headerName ?VALUE??} + } else { + set result {} + set combined {} + foreach {key value} $state($whom) { + if {$key eq $header} { + lappend result $key $value + append combined $value {, } + } + } + if {$lenny == 1} { + return $result + } else { + return [string range $combined 0 end-2] + } + } +} + + +# ------------------------------------------------------------------------------ +# Proc http::responseInfo +# ------------------------------------------------------------------------------ +# Command to return a dictionary of the most useful metadata of a HTTP +# response. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: a dict. See man page http(n) for a description of each item. +# ------------------------------------------------------------------------------ + +proc http::responseInfo {token} { + variable $token + upvar 0 $token state + set result {} + foreach {key origin name} { + stage STATE state + status STATE status + responseCode STATE responseCode + reasonPhrase STATE reasonPhrase + contentType STATE type + binary STATE binary + redirection RESP location + upgrade STATE upgrade + error ERROR - + postError STATE posterror + method STATE method + charset STATE charset + compression STATE coding + httpRequest STATE -protocol + httpResponse STATE httpResponse + url STATE url + connectionRequest REQ connection + connectionResponse RESP connection + connectionActual STATE connection + transferEncoding STATE transfer + totalPost STATE querylength + currentPost STATE queryoffset + totalSize STATE totalsize + currentSize STATE currentsize + proxyUsed STATE proxyUsed + } { + if {$origin eq {STATE}} { + if {[info exists state($name)]} { + dict set result $key $state($name) + } else { + # Should never come here + dict set result $key {} + } + } elseif {$origin eq {REQ}} { + dict set result $key [requestHeaderValue $token $name] + } elseif {$origin eq {RESP}} { + dict set result $key [responseHeaderValue $token $name] + } elseif {$origin eq {ERROR}} { + # Don't flood the dict with data. The command ::http::error is + # available. + if {[info exists state(error)]} { + set msg [lindex $state(error) 0] + } else { + set msg {} + } + dict set result $key $msg + } else { + # Should never come here + dict set result $key {} + } + } + return $result +} +proc http::error {token} { + variable $token + upvar 0 $token state + if {[info exists state(error)]} { + return $state(error) + } + return +} +proc http::postError {token} { + variable $token + upvar 0 $token state + if {[info exists state(postErrorFull)]} { + return $state(postErrorFull) + } + return +} + +# http::cleanup +# +# Garbage collect the state associated with a transaction +# +# Arguments +# token The token returned from http::geturl +# +# Side Effects +# Unsets the state array. + +proc http::cleanup {token} { + variable $token + upvar 0 $token state + if {[info commands ${token}--EventCoroutine] ne {}} { + rename ${token}--EventCoroutine {} + } + if {[info commands ${token}--SocketCoroutine] ne {}} { + rename ${token}--SocketCoroutine {} + } + if {[info exists state(after)]} { + after cancel $state(after) + unset state(after) + } + if {[info exists state(socketcoro)]} { + Log $token Cancel socket after-idle event (cleanup) + after cancel $state(socketcoro) + unset state(socketcoro) + } + if {[info exists state]} { + unset state + } + return +} + +# http::Connect +# +# This callback is made when an asynchronous connection completes. +# +# Arguments +# token The token returned from http::geturl +# +# Side Effects +# Sets the status of the connection, which unblocks +# the waiting geturl call + +proc http::Connect {token proto phost srvurl} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + + if {[catch {eof $state(sock)} tmp] || $tmp} { + set err "due to unexpected EOF" + } elseif {[set err [fconfigure $state(sock) -error]] ne ""} { + # set err is done in test + } else { + # All OK + set state(state) connecting + fileevent $state(sock) writable {} + ::http::Connected $token $proto $phost $srvurl + return + } + + # Error cases. + Log "WARNING - if testing, pay special attention to this\ + case (GJ) which is seldom executed - token $token" + if {[info exists state(reusing)] && $state(reusing)} { + # The socket was closed at the server end, and closed at + # this end by http::CheckEof. + if {[TestForReplay $token write $err b]} { + return + } + + # else: + # This is NOT a persistent socket that has been closed since its + # last use. + # If any other requests are in flight or pipelined/queued, they will + # be discarded. + } + Finish $token "connect failed: $err" + return +} + +# http::Write +# +# Write POST query data to the socket +# +# Arguments +# token The token for the connection +# +# Side Effects +# Write the socket and handle callbacks. + +proc http::Write {token} { + variable http + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + set tk [namespace tail $token] + set sock $state(sock) + + # Output a block. Tcl will buffer this if the socket blocks + set done 0 + if {[catch { + # Catch I/O errors on dead sockets + + if {[info exists state(-query)]} { + # Chop up large query strings so queryprogress callback can give + # smooth feedback. + if { $state(queryoffset) + $state(-queryblocksize) + >= $state(querylength) + } { + # This will be the last puts for the request-body. + if { (![catch {fileevent $sock readable} binding]) + && ($binding eq [list http::CheckEof $sock]) + } { + # Remove the "fileevent readable" binding of an idle + # persistent socket to http::CheckEof. We can no longer + # treat bytes received as junk. The server might still time + # out and half-close the socket if it has not yet received + # the first "puts". + fileevent $sock readable {} + } + } + puts -nonewline $sock \ + [string range $state(-query) $state(queryoffset) \ + [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] + incr state(queryoffset) $state(-queryblocksize) + if {$state(queryoffset) >= $state(querylength)} { + set state(queryoffset) $state(querylength) + set done 1 + } + } else { + # Copy blocks from the query channel + + set outStr [read $state(-querychannel) $state(-queryblocksize)] + if {[eof $state(-querychannel)]} { + # This will be the last puts for the request-body. + if { (![catch {fileevent $sock readable} binding]) + && ($binding eq [list http::CheckEof $sock]) + } { + # Remove the "fileevent readable" binding of an idle + # persistent socket to http::CheckEof. We can no longer + # treat bytes received as junk. The server might still time + # out and half-close the socket if it has not yet received + # the first "puts". + fileevent $sock readable {} + } + } + puts -nonewline $sock $outStr + incr state(queryoffset) [string length $outStr] + if {[eof $state(-querychannel)]} { + set done 1 + } + } + } err opts]} { + # Do not call Finish here, but instead let the read half of the socket + # process whatever server reply there is to get. + set state(posterror) $err + set info [dict get $opts -errorinfo] + set code [dict get $opts -code] + set state(postErrorFull) [list $err $info $code] + set done 1 + } + + if {$done} { + catch {flush $sock} + fileevent $sock writable {} + Log ^C$tk end sending request - token $token + # End of writing (POST method). The request has been sent. + + DoneRequest $token + } + + # Callback to the client after we've completely handled everything. + + if {[string length $state(-queryprogress)]} { + namespace eval :: $state(-queryprogress) \ + [list $token $state(querylength) $state(queryoffset)] + } + return +} + +# http::Event +# +# Handle input on the socket. This command is the core of +# the coroutine commands ${token}--EventCoroutine that are +# bound to "fileevent $sock readable" and process input. +# +# Arguments +# sock The socket receiving input. +# token The token returned from http::geturl +# +# Side Effects +# Read the socket and handle callbacks. + +proc http::Event {sock token} { + variable http + variable socketMapping + variable socketRdState + variable socketWrState + variable socketRdQueue + variable socketWrQueue + variable socketPhQueue + variable socketClosing + variable socketPlayCmd + variable socketCoEvent + variable socketProxyId + + variable $token + upvar 0 $token state + set tk [namespace tail $token] + while 1 { + yield + ##Log Event call - token $token + + if {![info exists state]} { + Log "Event $sock with invalid token '$token' - remote close?" + if {!([catch {eof $sock} tmp] || $tmp)} { + if {[set d [read $sock]] ne ""} { + Log "WARNING: additional data left on closed socket\ + - token $token" + } else { + } + } else { + } + Log ^X$tk end of response (token error) - token $token + CloseSocket $sock + return + } else { + } + if {$state(state) eq "connecting"} { + ##Log - connecting - token $token + if { $state(reusing) + && $state(-pipeline) + && ($state(-timeout) > 0) + && (![info exists state(after)]) + } { + set state(after) [after $state(-timeout) \ + [list http::reset $token timeout]] + } else { + } + + if {[catch {gets $sock state(http)} nsl]} { + Log "WARNING - if testing, pay special attention to this\ + case (GK) which is seldom executed - token $token" + if {[info exists state(reusing)] && $state(reusing)} { + # The socket was closed at the server end, and closed at + # this end by http::CheckEof. + + if {[TestForReplay $token read $nsl c]} { + return + } else { + } + # else: + # This is NOT a persistent socket that has been closed since + # its last use. + # If any other requests are in flight or pipelined/queued, + # they will be discarded. + } else { + # https handshake errors come here, for + # Tcl 8.7 with http::SecureProxyConnect. + set msg [registerError $sock] + registerError $sock {} + if {$msg eq {}} { + set msg $nsl + } + Log ^X$tk end of response (error) - token $token + Finish $token $msg + return + } + } elseif {$nsl >= 0} { + ##Log - connecting 1 - token $token + set state(state) "header" + } elseif { ([catch {eof $sock} tmp] || $tmp) + && [info exists state(reusing)] + && $state(reusing) + } { + # The socket was closed at the server end, and we didn't notice. + # This is the first read - where the closure is usually first + # detected. + + if {[TestForReplay $token read {} d]} { + return + } else { + } + + # else: + # This is NOT a persistent socket that has been closed since its + # last use. + # If any other requests are in flight or pipelined/queued, they + # will be discarded. + } else { + } + } elseif {$state(state) eq "header"} { + if {[catch {gets $sock line} nhl]} { + ##Log header failed - token $token + Log ^X$tk end of response (error) - token $token + Finish $token $nhl + return + } elseif {$nhl == 0} { + ##Log header done - token $token + Log ^E$tk end of response headers - token $token + # We have now read all headers + # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 + if { ($state(http) == "") + || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100) + } { + set state(state) "connecting" + continue + # This was a "return" in the pre-coroutine code. + } else { + } + + # We have $state(http) so let's split it into its components. + if {[regexp {^HTTP/(\S+) ([0-9]{3}) (.*)$} $state(http) \ + -> httpResponse responseCode reasonPhrase] + } { + set state(httpResponse) $httpResponse + set state(responseCode) $responseCode + set state(reasonPhrase) $reasonPhrase + } else { + set state(httpResponse) $state(http) + set state(responseCode) $state(http) + set state(reasonPhrase) $state(http) + } + + if { ([info exists state(connection)]) + && ([info exists socketMapping($state(socketinfo))]) + && ("keep-alive" in $state(connection)) + && ($state(-keepalive)) + && (!$state(reusing)) + && ($state(-pipeline)) + } { + # Response headers received for first request on a + # persistent socket. Now ready for pipelined writes (if + # any). + # Previous value is $token. It cannot be "pending". + set socketWrState($state(socketinfo)) Wready + http::NextPipelinedWrite $token + } else { + } + + # Once a "close" has been signaled, the client MUST NOT send any + # more requests on that connection. + # + # If either the client or the server sends the "close" token in + # the Connection header, that request becomes the last one for + # the connection. + + if { ([info exists state(connection)]) + && ([info exists socketMapping($state(socketinfo))]) + && ("close" in $state(connection)) + && ($state(-keepalive)) + } { + # The server warns that it will close the socket after this + # response. + ##Log WARNING - socket will close after response for $token + # Prepare data for a call to ReplayIfClose. + Log $token socket will close after this transaction + # 1. Cancel socket-assignment coro events that have not yet + # launched, and add the tokens to the write queue. + if {[info exists socketCoEvent($state(socketinfo))]} { + foreach {tok can} $socketCoEvent($state(socketinfo)) { + lappend socketWrQueue($state(socketinfo)) $tok + unset -nocomplain ${tok}(socketcoro) + after cancel $can + Log $tok Cancel socket after-idle event (Event) + Log Move $tok from socketCoEvent to socketWrQueue and cancel its after idle coro + } + set socketCoEvent($state(socketinfo)) {} + } else { + } + + if { ($socketRdQueue($state(socketinfo)) ne {}) + || ($socketWrQueue($state(socketinfo)) ne {}) + || ($socketWrState($state(socketinfo)) ni + [list Wready peNding $token]) + } { + set InFlightW $socketWrState($state(socketinfo)) + if {$InFlightW in [list Wready peNding $token]} { + set InFlightW Wready + } else { + set msg "token ${InFlightW} is InFlightW" + ##Log $msg - token $token + } + set socketPlayCmd($state(socketinfo)) \ + [list ReplayIfClose $InFlightW \ + $socketRdQueue($state(socketinfo)) \ + $socketWrQueue($state(socketinfo))] + + # - All tokens are preserved for re-use by ReplayCore. + # - Queues are preserved in case of Finish with error, + # but are not used for anything else because + # socketClosing(*) is set below. + # - Cancel the state(after) timeout events. + foreach tokenVal $socketRdQueue($state(socketinfo)) { + if {[info exists ${tokenVal}(after)]} { + after cancel [set ${tokenVal}(after)] + unset ${tokenVal}(after) + } else { + } + # Tokens in the read queue have no (socketcoro) to + # cancel. + } + } else { + set socketPlayCmd($state(socketinfo)) \ + {ReplayIfClose Wready {} {}} + } + + # Do not allow further connections on this socket (but + # geturl can add new requests to the replay). + set socketClosing($state(socketinfo)) 1 + } else { + } + + set state(state) body + + # According to + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection + # any comma-separated "Connection:" list implies keep-alive, but I + # don't see this in the RFC so we'll play safe and + # scan any list for "close". + # Done here to support combining duplicate header field's values. + if { [info exists state(connection)] + && ("close" ni $state(connection)) + && ("keep-alive" ni $state(connection)) + } { + lappend state(connection) "keep-alive" + } else { + } + + # If doing a HEAD, then we won't get any body + if {$state(-validate)} { + Log ^F$tk end of response for HEAD request - token $token + set state(state) complete + Eot $token + return + } elseif { + ($state(method) eq {CONNECT}) + && [string is integer -strict $state(responseCode)] + && ($state(responseCode) >= 200) + && ($state(responseCode) < 300) + } { + # A successful CONNECT response has no body. + # (An unsuccessful CONNECT has headers and body.) + # The code below is abstracted from Eot/Finish, but + # keeps the socket open. + catch {fileevent $state(sock) readable {}} + catch {fileevent $state(sock) writable {}} + set state(state) complete + set state(status) ok + if {[info commands ${token}--EventCoroutine] ne {}} { + rename ${token}--EventCoroutine {} + } + if {[info commands ${token}--SocketCoroutine] ne {}} { + rename ${token}--SocketCoroutine {} + } + if {[info exists state(socketcoro)]} { + Log $token Cancel socket after-idle event (Finish) + after cancel $state(socketcoro) + unset state(socketcoro) + } + if {[info exists state(after)]} { + after cancel $state(after) + unset state(after) + } + if { [info exists state(-command)] + && (![info exists state(done-command-cb)]) + } { + set state(done-command-cb) yes + if {[catch {namespace eval :: $state(-command) $token} err]} { + set state(error) [list $err $errorInfo $errorCode] + set state(status) error + } + } + return + } else { + } + + # - For non-chunked transfer we may have no body - in this case + # we may get no further file event if the connection doesn't + # close and no more data is sent. We can tell and must finish + # up now - not later - the alternative would be to wait until + # the server times out. + # - In this case, the server has NOT told the client it will + # close the connection, AND it has NOT indicated the resource + # length EITHER by setting the Content-Length (totalsize) OR + # by using chunked Transfer-Encoding. + # - Do not worry here about the case (Connection: close) because + # the server should close the connection. + # - IF (NOT Connection: close) AND (NOT chunked encoding) AND + # (totalsize == 0). + + if { (!( [info exists state(connection)] + && ("close" in $state(connection)) + ) + ) + && ($state(transfer) eq {}) + && ($state(totalsize) == 0) + } { + set msg {body size is 0 and no events likely - complete} + Log "$msg - token $token" + set msg {(length unknown, set to 0)} + Log ^F$tk end of response body {*}$msg - token $token + set state(state) complete + Eot $token + return + } else { + } + + # We have to use binary translation to count bytes properly. + lassign [fconfigure $sock -translation] trRead trWrite + fconfigure $sock -translation [list binary $trWrite] + + if { + $state(-binary) || [IsBinaryContentType $state(type)] + } { + # Turn off conversions for non-text data. + set state(binary) 1 + } else { + } + if {[info exists state(-channel)]} { + if {$state(binary) || [llength [ContentEncoding $token]]} { + fconfigure $state(-channel) -translation binary + } else { + } + if {![info exists state(-handler)]} { + # Initiate a sequence of background fcopies. + fileevent $sock readable {} + rename ${token}--EventCoroutine {} + CopyStart $sock $token + return + } else { + } + } else { + } + } elseif {$nhl > 0} { + # Process header lines. + ##Log header - token $token - $line + if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { + set key [string tolower $key] + switch -- $key { + content-type { + set state(type) [string trim [string tolower $value]] + # Grab the optional charset information. + if {[regexp -nocase \ + {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \ + $state(type) -> cs]} { + set state(charset) [string map {{\"} \"} $cs] + } else { + regexp -nocase {charset\s*=\s*(\S+?);?} \ + $state(type) -> state(charset) + } + } + content-length { + set state(totalsize) [string trim $value] + } + content-encoding { + set state(coding) [string trim $value] + } + transfer-encoding { + set state(transfer) \ + [string trim [string tolower $value]] + } + proxy-connection - + connection { + # RFC 7230 Section 6.1 states that a comma-separated + # list is an acceptable value. + if {![info exists state(connectionRespFlag)]} { + # This is the first "Connection" response header. + # Scrub the earlier value set by iniitialisation. + set state(connectionRespFlag) {} + set state(connection) {} + } + foreach el [SplitCommaSeparatedFieldValue $value] { + lappend state(connection) [string tolower $el] + } + } + upgrade { + set state(upgrade) [string trim $value] + } + set-cookie { + if {$http(-cookiejar) ne ""} { + ParseCookie $token [string trim $value] + } else { + } + } + } + lappend state(meta) $key [string trim $value] + } else { + } + } else { + } + } else { + # Now reading body + ##Log body - token $token + if {[catch { + if {[info exists state(-handler)]} { + set n [namespace eval :: $state(-handler) [list $sock $token]] + ##Log handler $n - token $token + # N.B. the protocol has been set to 1.0 because the -handler + # logic is not expected to handle chunked encoding. + # FIXME Allow -handler with 1.1 on dechunked stacked chan. + if {$state(totalsize) == 0} { + # We know the transfer is complete only when the server + # closes the connection - i.e. eof is not an error. + set state(state) complete + } else { + } + if {![string is integer -strict $n]} { + if 1 { + # Do not tolerate bad -handler - fail with error + # status. + set msg {the -handler command for http::geturl must\ + return an integer (the number of bytes\ + read)} + Log ^X$tk end of response (handler error) -\ + token $token + Eot $token $msg + } else { + # Tolerate the bad -handler, and continue. The + # penalty: + # (a) Because the handler returns nonsense, we know + # the transfer is complete only when the server + # closes the connection - i.e. eof is not an + # error. + # (b) http::size will not be accurate. + # (c) The transaction is already downgraded to 1.0 + # to avoid chunked transfer encoding. It MUST + # also be forced to "Connection: close" or the + # HTTP/1.0 equivalent; or it MUST fail (as + # above) if the server sends + # "Connection: keep-alive" or the HTTP/1.0 + # equivalent. + set n 0 + set state(state) complete + } + } else { + } + } elseif {[info exists state(transfer_final)]} { + # This code forgives EOF in place of the final CRLF. + set line [GetTextLine $sock] + set n [string length $line] + set state(state) complete + if {$n > 0} { + # - HTTP trailers (late response headers) are permitted + # by Chunked Transfer-Encoding, and can be safely + # ignored. + # - Do not count these bytes in the total received for + # the response body. + Log "trailer of $n bytes after final chunk -\ + token $token" + append state(transfer_final) $line + set n 0 + } else { + Log ^F$tk end of response body (chunked) - token $token + Log "final chunk part - token $token" + Eot $token + } + } elseif { [info exists state(transfer)] + && ($state(transfer) eq "chunked") + } { + ##Log chunked - token $token + set size 0 + set hexLenChunk [GetTextLine $sock] + #set ntl [string length $hexLenChunk] + if {[string trim $hexLenChunk] ne ""} { + scan $hexLenChunk %x size + if {$size != 0} { + ##Log chunk-measure $size - token $token + set chunk [BlockingRead $sock $size] + set n [string length $chunk] + if {$n >= 0} { + append state(body) $chunk + incr state(log_size) [string length $chunk] + ##Log chunk $n cumul $state(log_size) -\ + token $token + } else { + } + if {$size != [string length $chunk]} { + Log "WARNING: mis-sized chunk:\ + was [string length $chunk], should be\ + $size - token $token" + set n 0 + set state(connection) close + Log ^X$tk end of response (chunk error) \ + - token $token + set msg {error in chunked encoding - fetch\ + terminated} + Eot $token $msg + } else { + } + # CRLF that follows chunk. + # If eof, this is handled at the end of this proc. + GetTextLine $sock + } else { + set n 0 + set state(transfer_final) {} + } + } else { + # Line expected to hold chunk length is empty, or eof. + ##Log bad-chunk-measure - token $token + set n 0 + set state(connection) close + Log ^X$tk end of response (chunk error) - token $token + Eot $token {error in chunked encoding -\ + fetch terminated} + } + } else { + ##Log unchunked - token $token + if {$state(totalsize) == 0} { + # We know the transfer is complete only when the server + # closes the connection. + set state(state) complete + set reqSize $state(-blocksize) + } else { + # Ask for the whole of the unserved response-body. + # This works around a problem with a tls::socket - for + # https in keep-alive mode, and a request for + # $state(-blocksize) bytes, the last part of the + # resource does not get read until the server times out. + set reqSize [expr { $state(totalsize) + - $state(currentsize)}] + + # The workaround fails if reqSize is + # capped at $state(-blocksize). + # set reqSize [expr {min($reqSize, $state(-blocksize))}] + } + set c $state(currentsize) + set t $state(totalsize) + ##Log non-chunk currentsize $c of totalsize $t -\ + token $token + set block [read $sock $reqSize] + set n [string length $block] + if {$n >= 0} { + append state(body) $block + ##Log non-chunk [string length $state(body)] -\ + token $token + } else { + } + } + # This calculation uses n from the -handler, chunked, or + # unchunked case as appropriate. + if {[info exists state]} { + if {$n >= 0} { + incr state(currentsize) $n + set c $state(currentsize) + set t $state(totalsize) + ##Log another $n currentsize $c totalsize $t -\ + token $token + } else { + } + # If Content-Length - check for end of data. + if { + ($state(totalsize) > 0) + && ($state(currentsize) >= $state(totalsize)) + } { + Log ^F$tk end of response body (unchunked) -\ + token $token + set state(state) complete + Eot $token + } else { + } + } else { + } + } err]} { + Log ^X$tk end of response (error ${err}) - token $token + Finish $token $err + return + } else { + if {[info exists state(-progress)]} { + namespace eval :: $state(-progress) \ + [list $token $state(totalsize) $state(currentsize)] + } else { + } + } + } + + # catch as an Eot above may have closed the socket already + # $state(state) may be connecting, header, body, or complete + if {(![catch {eof $sock} eof]) && $eof} { + # [eof sock] succeeded and the result was 1 + ##Log eof - token $token + if {[info exists $token]} { + set state(connection) close + if {$state(state) eq "complete"} { + # This includes all cases in which the transaction + # can be completed by eof. + # The value "complete" is set only in http::Event, and it is + # used only in the test above. + Log ^F$tk end of response body (unchunked, eof) -\ + token $token + Eot $token + } else { + # Premature eof. + Log ^X$tk end of response (unexpected eof) - token $token + Eot $token eof + } + } else { + # open connection closed on a token that has been cleaned up. + Log ^X$tk end of response (token error) - token $token + CloseSocket $sock + } + } else { + # EITHER [eof sock] failed - presumed done by Eot + # OR [eof sock] succeeded and the result was 0 + } + } + return +} + +# http::TestForReplay +# +# Command called if eof is discovered when a socket is first used for a +# new transaction. Typically this occurs if a persistent socket is used +# after a period of idleness and the server has half-closed the socket. +# +# token - the connection token returned by http::geturl +# doing - "read" or "write" +# err - error message, if any +# caller - code to identify the caller - used only in logging +# +# Return Value: boolean, true iff the command calls http::ReplayIfDead. + +proc http::TestForReplay {token doing err caller} { + variable http + variable $token + upvar 0 $token state + set tk [namespace tail $token] + if {$doing eq "read"} { + set code Q + set action response + set ing reading + } else { + set code P + set action request + set ing writing + } + + if {$err eq {}} { + set err "detect eof when $ing (server timed out?)" + } + + if {$state(method) eq "POST" && !$http(-repost)} { + # No Replay. + # The present transaction will end when Finish is called. + # That call to Finish will abort any other transactions + # currently in the write queue. + # For calls from http::Event this occurs when execution + # reaches the code block at the end of that proc. + set msg {no retry for POST with http::config -repost 0} + Log reusing socket failed "($caller)" - $msg - token $token + Log error - $err - token $token + Log ^X$tk end of $action (error) - token $token + return 0 + } else { + # Replay. + set msg {try a new socket} + Log reusing socket failed "($caller)" - $msg - token $token + Log error - $err - token $token + Log ^$code$tk Any unfinished (incl this one) failed - token $token + ReplayIfDead $token $doing + return 1 + } +} + +# http::IsBinaryContentType -- +# +# Determine if the content-type means that we should definitely transfer +# the data as binary. [Bug 838e99a76d] +# +# Arguments +# type The content-type of the data. +# +# Results: +# Boolean, true if we definitely should be binary. + +proc http::IsBinaryContentType {type} { + lassign [split [string tolower $type] "/;"] major minor + if {$major eq "text"} { + return false + } + # There's a bunch of XML-as-application-format things about. See RFC 3023 + # and so on. + if {$major eq "application"} { + set minor [string trimright $minor] + if {$minor in {"json" "xml" "xml-external-parsed-entity" "xml-dtd"}} { + return false + } + } + # Not just application/foobar+xml but also image/svg+xml, so let us not + # restrict things for now... + if {[string match "*+xml" $minor]} { + return false + } + return true +} + +proc http::ParseCookie {token value} { + variable http + variable CookieRE + variable $token + upvar 0 $token state + + if {![regexp $CookieRE $value -> cookiename cookieval opts]} { + # Bad cookie! No biscuit! + return + } + + # Convert the options into a list before feeding into the cookie store; + # ugly, but quite easy. + set realopts {hostonly 1 path / secure 0 httponly 0} + dict set realopts origin $state(host) + dict set realopts domain $state(host) + foreach option [split [regsub -all {;\s+} $opts \u0000] \u0000] { + regexp {^(.*?)(?:=(.*))?$} $option -> optname optval + switch -exact -- [string tolower $optname] { + expires { + if {[catch { + #Sun, 06 Nov 1994 08:49:37 GMT + dict set realopts expires \ + [clock scan $optval -format "%a, %d %b %Y %T %Z"] + }] && [catch { + # Google does this one + #Mon, 01-Jan-1990 00:00:00 GMT + dict set realopts expires \ + [clock scan $optval -format "%a, %d-%b-%Y %T %Z"] + }] && [catch { + # This is in the RFC, but it is also in the original + # Netscape cookie spec, now online at: + # + #Sunday, 06-Nov-94 08:49:37 GMT + dict set realopts expires \ + [clock scan $optval -format "%A, %d-%b-%y %T %Z"] + }]} {catch { + #Sun Nov 6 08:49:37 1994 + dict set realopts expires \ + [clock scan $optval -gmt 1 -format "%a %b %d %T %Y"] + }} + } + max-age { + # Normalize + if {[string is integer -strict $optval]} { + dict set realopts expires [expr {[clock seconds] + $optval}] + } + } + domain { + # From the domain-matches definition [RFC 2109, section 2]: + # Host A's name domain-matches host B's if [...] + # A is a FQDN string and has the form NB, where N is a + # non-empty name string, B has the form .B', and B' is a + # FQDN string. (So, x.y.com domain-matches .y.com but + # not y.com.) + if {$optval ne "" && ![string match *. $optval]} { + dict set realopts domain [string trimleft $optval "."] + dict set realopts hostonly [expr { + ! [string match .* $optval] + }] + } + } + path { + if {[string match /* $optval]} { + dict set realopts path $optval + } + } + secure - httponly { + dict set realopts [string tolower $optname] 1 + } + } + } + dict set realopts key $cookiename + dict set realopts value $cookieval + {*}$http(-cookiejar) storeCookie $realopts +} + +# http::GetTextLine -- +# +# Get one line with the stream in crlf mode. +# Used if Transfer-Encoding is chunked, to read the line that +# reports the size of the following chunk. +# Empty line is not distinguished from eof. The caller must +# be able to handle this. +# +# Arguments +# sock The socket receiving input. +# +# Results: +# The line of text, without trailing newline + +proc http::GetTextLine {sock} { + set tr [fconfigure $sock -translation] + lassign $tr trRead trWrite + fconfigure $sock -translation [list crlf $trWrite] + set r [BlockingGets $sock] + fconfigure $sock -translation $tr + return $r +} + +# http::BlockingRead +# +# Replacement for a blocking read. +# The caller must be a coroutine. +# Used when we expect to read a chunked-encoding +# chunk of known size. + +proc http::BlockingRead {sock size} { + if {$size < 1} { + return + } + set result {} + while 1 { + set need [expr {$size - [string length $result]}] + set block [read $sock $need] + set eof [expr {[catch {eof $sock} tmp] || $tmp}] + append result $block + if {[string length $result] >= $size || $eof} { + return $result + } else { + yield + } + } +} + +# http::BlockingGets +# +# Replacement for a blocking gets. +# The caller must be a coroutine. +# Empty line is not distinguished from eof. The caller must +# be able to handle this. + +proc http::BlockingGets {sock} { + while 1 { + set count [gets $sock line] + set eof [expr {[catch {eof $sock} tmp] || $tmp}] + if {$count >= 0 || $eof} { + return $line + } else { + yield + } + } +} + +# http::CopyStart +# +# Error handling wrapper around fcopy +# +# Arguments +# sock The socket to copy from +# token The token returned from http::geturl +# +# Side Effects +# This closes the connection upon error + +proc http::CopyStart {sock token {initial 1}} { + upvar 0 $token state + if {[info exists state(transfer)] && $state(transfer) eq "chunked"} { + foreach coding [ContentEncoding $token] { + if {$coding eq {deflateX}} { + # Use the standards-compliant choice. + set coding2 decompress + } else { + set coding2 $coding + } + lappend state(zlib) [zlib stream $coding2] + } + MakeTransformationChunked $sock [namespace code [list CopyChunk $token]] + } else { + if {$initial} { + foreach coding [ContentEncoding $token] { + if {$coding eq {deflateX}} { + # Use the standards-compliant choice. + set coding2 decompress + } else { + set coding2 $coding + } + zlib push $coding2 $sock + } + } + if {[catch { + # FIXME Keep-Alive on https tls::socket with unchunked transfer + # hangs until the server times out. A workaround is possible, as for + # the case without -channel, but it does not use the neat "fcopy" + # solution. + fcopy $sock $state(-channel) -size $state(-blocksize) -command \ + [list http::CopyDone $token] + } err]} { + Finish $token $err + } + } + return +} + +proc http::CopyChunk {token chunk} { + upvar 0 $token state + if {[set count [string length $chunk]]} { + incr state(currentsize) $count + if {[info exists state(zlib)]} { + foreach stream $state(zlib) { + set chunk [$stream add $chunk] + } + } + puts -nonewline $state(-channel) $chunk + if {[info exists state(-progress)]} { + namespace eval :: [linsert $state(-progress) end \ + $token $state(totalsize) $state(currentsize)] + } + } else { + Log "CopyChunk Finish - token $token" + if {[info exists state(zlib)]} { + set excess "" + foreach stream $state(zlib) { + catch { + $stream put -finalize $excess + set excess "" + set overflood "" + while {[set overflood [$stream get]] ne ""} { append excess $overflood } + } + } + puts -nonewline $state(-channel) $excess + foreach stream $state(zlib) { $stream close } + unset state(zlib) + } + Eot $token ;# FIX ME: pipelining. + } + return +} + +# http::CopyDone +# +# fcopy completion callback +# +# Arguments +# token The token returned from http::geturl +# count The amount transferred +# +# Side Effects +# Invokes callbacks + +proc http::CopyDone {token count {error {}}} { + variable $token + upvar 0 $token state + set sock $state(sock) + incr state(currentsize) $count + if {[info exists state(-progress)]} { + namespace eval :: $state(-progress) \ + [list $token $state(totalsize) $state(currentsize)] + } + # At this point the token may have been reset. + if {[string length $error]} { + Finish $token $error + } elseif {[catch {eof $sock} iseof] || $iseof} { + Eot $token + } else { + CopyStart $sock $token 0 + } + return +} + +# http::Eot +# +# Called when either: +# a. An eof condition is detected on the socket. +# b. The client decides that the response is complete. +# c. The client detects an inconsistency and aborts the transaction. +# +# Does: +# 1. Set state(status) +# 2. Reverse any Content-Encoding +# 3. Convert charset encoding and line ends if necessary +# 4. Call http::Finish +# +# Arguments +# token The token returned from http::geturl +# force (previously) optional, has no effect +# reason - "eof" means premature EOF (not EOF as the natural end of +# the response) +# - "" means completion of response, with or without EOF +# - anything else describes an error condition other than +# premature EOF. +# +# Side Effects +# Clean up the socket + +proc http::Eot {token {reason {}}} { + variable $token + upvar 0 $token state + if {$reason eq "eof"} { + # Premature eof. + set state(status) eof + set reason {} + } elseif {$reason ne ""} { + # Abort the transaction. + set state(status) $reason + } else { + # The response is complete. + set state(status) ok + } + + if {[string length $state(body)] > 0} { + if {[catch { + foreach coding [ContentEncoding $token] { + if {$coding eq {deflateX}} { + # First try the standards-compliant choice. + set coding2 decompress + if {[catch {zlib $coding2 $state(body)} result]} { + # If that fails, try the MS non-compliant choice. + set coding2 inflate + set state(body) [zlib $coding2 $state(body)] + } else { + # error {failed at standards-compliant deflate} + set state(body) $result + } + } else { + set state(body) [zlib $coding $state(body)] + } + } + } err]} { + Log "error doing decompression for token $token: $err" + Finish $token $err + return + } + + if {!$state(binary)} { + # If we are getting text, set the incoming channel's encoding + # correctly. iso8859-1 is the RFC default, but this could be any + # IANA charset. However, we only know how to convert what we have + # encodings for. + + set enc [CharsetToEncoding $state(charset)] + if {$enc ne "binary"} { + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + } else { + set state(body) [encoding convertfrom $enc $state(body)] + } + } + + # Translate text line endings. + set state(body) [string map {\r\n \n \r \n} $state(body)] + } + if {[info exists state(-guesstype)] && $state(-guesstype)} { + GuessType $token + } + } + Finish $token $reason + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::GuessType +# ------------------------------------------------------------------------------ +# Command to attempt limited analysis of a resource with undetermined +# Content-Type, i.e. "application/octet-stream". This value can be set for two +# reasons: +# (a) by the server, in a Content-Type header +# (b) by http::geturl, as the default value if the server does not supply a +# Content-Type header. +# +# This command converts a resource if: +# (1) it has type application/octet-stream +# (2) it begins with an XML declaration "?" +# (3) one tag is named "encoding" and has a recognised value; or no "encoding" +# tag exists (defaulting to utf-8) +# +# RFC 9110 Sec. 8.3 states: +# "If a Content-Type header field is not present, the recipient MAY either +# assume a media type of "application/octet-stream" ([RFC2046], Section 4.5.1) +# or examine the data to determine its type." +# +# The RFC goes on to describe the pitfalls of "MIME sniffing", including +# possible security risks. +# +# Arguments: +# token - connection token +# +# Return Value: (boolean) true iff a change has been made +# ------------------------------------------------------------------------------ + +proc http::GuessType {token} { + variable $token + upvar 0 $token state + + if {$state(type) ne {application/octet-stream}} { + return 0 + } + + set body $state(body) + # e.g. { ...} + + if {![regexp -nocase -- {^<[?]xml[[:space:]][^>?]*[?]>} $body match]} { + return 0 + } + # e.g. {} + + set contents [regsub -- {[[:space:]]+} $match { }] + set contents [string range [string tolower $contents] 6 end-2] + # e.g. {version="1.0" encoding="utf-8"} + # without excess whitespace or upper-case letters + + if {![regexp -- {^([^=" ]+="[^"]+" )+$} "$contents "]} { + return 0 + } + # The application/xml default encoding: + set res utf-8 + + set tagList [regexp -all -inline -- {[^=" ]+="[^"]+"} $contents] + foreach tag $tagList { + regexp -- {([^=" ]+)="([^"]+)"} $tag -> name value + if {$name eq {encoding}} { + set res $value + } + } + set enc [CharsetToEncoding $res] + if {$enc eq "binary"} { + return 0 + } + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + } else { + set state(body) [encoding convertfrom $enc $state(body)] + } + set state(body) [string map {\r\n \n \r \n} $state(body)] + set state(type) application/xml + set state(binary) 0 + set state(charset) $res + return 1 +} + + +# http::wait -- +# +# See documentation for details. +# +# Arguments: +# token Connection token. +# +# Results: +# The status after the wait. + +proc http::wait {token} { + variable $token + upvar 0 $token state + + if {![info exists state(status)] || $state(status) eq ""} { + # We must wait on the original variable name, not the upvar alias + vwait ${token}(status) + } + + return [status $token] +} + +# http::formatQuery -- +# +# See documentation for details. Call http::formatQuery with an even +# number of arguments, where the first is a name, the second is a value, +# the third is another name, and so on. +# +# Arguments: +# args A list of name-value pairs. +# +# Results: +# TODO + +proc http::formatQuery {args} { + if {[llength $args] % 2} { + return \ + -code error \ + -errorcode [list HTTP BADARGCNT $args] \ + {Incorrect number of arguments, must be an even number.} + } + set result "" + set sep "" + foreach i $args { + append result $sep [quoteString $i] + if {$sep eq "="} { + set sep & + } else { + set sep = + } + } + return $result +} + +# http::quoteString -- +# +# Do x-www-urlencoded character mapping +# +# Arguments: +# string The string the needs to be encoded +# +# Results: +# The encoded string + +proc http::quoteString {string} { + variable http + variable formMap + + # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use + # a pre-computed map and [string map] to do the conversion (much faster + # than [regsub]/[subst]). [Bug 1020491] + + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set string [encoding convertto -profile tcl8 $http(-urlencoding) $string] + } else { + set string [encoding convertto $http(-urlencoding) $string] + } + return [string map $formMap $string] +} + +# http::ProxyRequired -- +# Default proxy filter. +# +# Arguments: +# host The destination host +# +# Results: +# The current proxy settings + +proc http::ProxyRequired {host} { + variable http + if {(![info exists http(-proxyhost)]) || ($http(-proxyhost) eq {})} { + return + } + if {![info exists http(-proxyport)] || ($http(-proxyport) eq {})} { + set port 8080 + } else { + set port $http(-proxyport) + } + + # Simple test (cf. autoproxy) for hosts that must be accessed directly, + # not through the proxy server. + foreach domain $http(-proxynot) { + if {[string match -nocase $domain $host]} { + return {} + } + } + return [list $http(-proxyhost) $port] +} + +# http::CharsetToEncoding -- +# +# Tries to map a given IANA charset to a tcl encoding. If no encoding +# can be found, returns binary. +# + +proc http::CharsetToEncoding {charset} { + variable encodings + + set charset [string tolower $charset] + if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} { + set encoding "iso8859-$num" + } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { + set encoding "iso2022-$ext" + } elseif {[regexp {shift[-_]?jis} $charset]} { + set encoding "shiftjis" + } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { + set encoding "cp$num" + } elseif {$charset eq "us-ascii"} { + set encoding "ascii" + } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} { + switch -- $num { + 5 {set encoding "iso8859-9"} + 1 - 2 - 3 { + set encoding "iso8859-$num" + } + default { + set encoding "binary" + } + } + } else { + # other charset, like euc-xx, utf-8,... may directly map to encoding + set encoding $charset + } + set idx [lsearch -exact $encodings $encoding] + if {$idx >= 0} { + return $encoding + } else { + return "binary" + } +} + + +# ------------------------------------------------------------------------------ +# Proc http::ContentEncoding +# ------------------------------------------------------------------------------ +# Return the list of content-encoding transformations we need to do in order. +# + # -------------------------------------------------------------------------- + # Options for Accept-Encoding, Content-Encoding: the switch command + # -------------------------------------------------------------------------- + # The symbol deflateX allows http to attempt both versions of "deflate", + # unless there is a -channel - for a -channel, only "decompress" is tried. + # Alternative/extra lines for switch: + # The standards-compliant version of "deflate" can be chosen with: + # deflate { lappend r decompress } + # The Microsoft non-compliant version of "deflate" can be chosen with: + # deflate { lappend r inflate } + # The previously used implementation of "compress", which appears to be + # incorrect and is rarely used by web servers, can be chosen with: + # compress - x-compress { lappend r decompress } + # -------------------------------------------------------------------------- +# +# Arguments: +# token - Connection token. +# +# Return Value: list +# ------------------------------------------------------------------------------ + +proc http::ContentEncoding {token} { + upvar 0 $token state + set r {} + if {[info exists state(coding)]} { + foreach coding [split $state(coding) ,] { + switch -exact -- $coding { + deflate { lappend r deflateX } + gzip - x-gzip { lappend r gunzip } + identity {} + br { + return -code error\ + "content-encoding \"br\" not implemented" + } + default { + Log "unknown content-encoding \"$coding\" ignored" + } + } + } + } + return $r +} + +proc http::ReceiveChunked {chan command} { + set data "" + set size -1 + yield + while {1} { + chan configure $chan -translation {crlf binary} + while {[gets $chan line] < 1} { yield } + chan configure $chan -translation {binary binary} + if {[scan $line %x size] != 1} { + return -code error "invalid size: \"$line\"" + } + set chunk "" + while {$size && ![chan eof $chan]} { + set part [chan read $chan $size] + incr size -[string length $part] + append chunk $part + } + if {[catch { + uplevel #0 [linsert $command end $chunk] + }]} { + http::Log "Error in callback: $::errorInfo" + } + if {[string length $chunk] == 0} { + # channel might have been closed in the callback + catch {chan event $chan readable {}} + return + } + } +} + +# http::SplitCommaSeparatedFieldValue -- +# Return the individual values of a comma-separated field value. +# +# Arguments: +# fieldValue Comma-separated header field value. +# +# Results: +# List of values. +proc http::SplitCommaSeparatedFieldValue {fieldValue} { + set r {} + foreach el [split $fieldValue ,] { + lappend r [string trim $el] + } + return $r +} + + +# http::GetFieldValue -- +# Return the value of a header field. +# +# Arguments: +# headers Headers key-value list +# fieldName Name of header field whose value to return. +# +# Results: +# The value of the fieldName header field +# +# Field names are matched case-insensitively (RFC 7230 Section 3.2). +# +# If the field is present multiple times, it is assumed that the field is +# defined as a comma-separated list and the values are combined (by separating +# them with commas, see RFC 7230 Section 3.2.2) and returned at once. +proc http::GetFieldValue {headers fieldName} { + set r {} + foreach {field value} $headers { + if {[string equal -nocase $fieldName $field]} { + if {$r eq {}} { + set r $value + } else { + append r ", $value" + } + } + } + return $r +} + +proc http::MakeTransformationChunked {chan command} { + coroutine [namespace current]::dechunk$chan ::http::ReceiveChunked $chan $command + chan event $chan readable [namespace current]::dechunk$chan + return +} + +interp alias {} http::data {} http::responseBody +interp alias {} http::code {} http::responseLine +interp alias {} http::mapReply {} http::quoteString +interp alias {} http::meta {} http::responseHeaders +interp alias {} http::metaValue {} http::responseHeaderValue +interp alias {} http::ncode {} http::responseCode + + +# ------------------------------------------------------------------------------ +# Proc http::socketForTls +# ------------------------------------------------------------------------------ +# Command to use in place of ::socket as the value of ::tls::socketCmd. +# This command does the same as http::socket, and also handles https connections +# through a proxy server. +# +# Notes. +# - The proxy server works differently for https and http. This implementation +# is for https. The proxy for http is implemented in http::CreateToken (in +# code that was previously part of http::geturl). +# - This code implicitly uses the tls options set for https in a call to +# http::register, and does not need to call commands tls::*. This simple +# implementation is possible because tls uses a callback to ::socket that can +# be redirected by changing the value of ::tls::socketCmd. +# +# Arguments: +# args - as for ::socket +# +# Return Value: a socket identifier +# ------------------------------------------------------------------------------ + +proc http::socketForTls {args} { + variable http + set host [lindex $args end-1] + set port [lindex $args end] + if { ($http(-proxyfilter) ne {}) + && (![catch {$http(-proxyfilter) $host} proxy]) + } { + set phost [lindex $proxy 0] + set pport [lindex $proxy 1] + } else { + set phost {} + set pport {} + } + if {$phost eq ""} { + set sock [::http::socket {*}$args] + } else { + set sock [::http::SecureProxyConnect {*}$args $phost $pport] + } + return $sock +} + + +# ------------------------------------------------------------------------------ +# Proc http::SecureProxyConnect +# ------------------------------------------------------------------------------ +# Command to open a socket through a proxy server to a remote server for use by +# tls. The caller must perform the tls handshake. +# +# Notes +# - Based on patch supplied by Melissa Chawla in ticket 1173760, and +# Proxy-Authorization header cf. autoproxy by Pat Thoyts. +# - Rewritten as a call to http::geturl, because response headers and body are +# needed if the CONNECT request fails. CONNECT is implemented for this case +# only, by state(bypass). +# - FUTURE WORK: give http::geturl a -connect option for a general CONNECT. +# - The request header Proxy-Connection is discouraged in RFC 7230 (June 2014), +# RFC 9112 (June 2022). +# +# Arguments: +# args - as for ::socket, ending in host, port; with proxy host, proxy +# port appended. +# +# Return Value: a socket identifier +# ------------------------------------------------------------------------------ + +proc http::SecureProxyConnect {args} { + variable http + variable ConnectVar + variable ConnectCounter + variable failedProxyValues + set varName ::http::ConnectVar([incr ConnectCounter]) + + # Extract (non-proxy) target from args. + set host [lindex $args end-3] + set port [lindex $args end-2] + set args [lreplace $args end-3 end-2] + + # Proxy server URL for connection. + # This determines where the socket is opened. + set phost [lindex $args end-1] + set pport [lindex $args end] + if {[string first : $phost] != -1} { + # IPv6 address, wrap it in [] so we can append :pport + set phost "\[${phost}\]" + } + set url http://${phost}:${pport} + # Elements of args other than host and port are not used when + # AsyncTransaction opens a socket. Those elements are -async and the + # -type $tokenName for the https transaction. Option -async is used by + # AsyncTransaction anyway, and -type $tokenName should not be propagated: + # the proxy request adds its own -type value. + + set targ [lsearch -exact $args -type] + if {$targ != -1} { + # Record in the token that this is a proxy call. + set token [lindex $args $targ+1] + upvar 0 ${token} state + set tim $state(-timeout) + set state(proxyUsed) SecureProxyFailed + # This value is overwritten with "SecureProxy" below if the CONNECT is + # successful. If it is unsuccessful, the socket will be closed + # below, and so in this unsuccessful case there are no other transactions + # whose (proxyUsed) must be updated. + } else { + set tim 0 + } + if {$tim == 0} { + # Do not use infinite timeout for the proxy. + set tim 30000 + } + + # Prepare and send a CONNECT request to the proxy, using + # code similar to http::geturl. + set requestHeaders [list Host $host] + lappend requestHeaders Connection keep-alive + if {$http(-proxyauth) != {}} { + lappend requestHeaders Proxy-Authorization $http(-proxyauth) + } + + set token2 [CreateToken $url -keepalive 0 -timeout $tim \ + -headers $requestHeaders -command [list http::AllDone $varName]] + variable $token2 + upvar 0 $token2 state2 + + # Kludges: + # Setting this variable overrides the HTTP request line and also allows + # -headers to override the Connection: header set by -keepalive. + # The arguments "-keepalive 0" ensure that when Finish is called for an + # unsuccessful request, the socket is always closed. + set state2(bypass) "CONNECT $host:$port HTTP/1.1" + + AsyncTransaction $token2 + + if {[info coroutine] ne {}} { + # All callers in the http package are coroutines launched by + # the event loop. + # The cwait command requires a coroutine because it yields + # to the caller; $varName is traced and the coroutine resumes + # when the variable is written. + cwait $varName + } else { + return -code error {code must run in a coroutine} + # For testing with a non-coroutine caller outside the http package. + # vwait $varName + } + unset $varName + + if { ($state2(state) ne "complete") + || ($state2(status) ne "ok") + || (![string is integer -strict $state2(responseCode)]) + } { + set msg {the HTTP request to the proxy server did not return a valid\ + and complete response} + if {[info exists state2(error)]} { + append msg ": " [lindex $state2(error) 0] + } + cleanup $token2 + return -code error $msg + } + + set code $state2(responseCode) + + if {($code >= 200) && ($code < 300)} { + # All OK. The caller in package tls will now call "tls::import $sock". + # The cleanup command does not close $sock. + # Other tidying was done in http::Event. + + # If this is a persistent socket, any other transactions that are + # already marked to use the socket will have their (proxyUsed) updated + # when http::OpenSocket calls http::ConfigureNewSocket. + set state(proxyUsed) SecureProxy + set sock $state2(sock) + cleanup $token2 + return $sock + } + + if {$targ != -1} { + # Non-OK HTTP status code; token is known because option -type + # (cf. targ) was passed through tcltls, and so the useful + # parts of the proxy's response can be copied to state(*). + # Do not copy state2(sock). + # Return the proxy response to the caller of geturl. + foreach name $failedProxyValues { + if {[info exists state2($name)]} { + set state($name) $state2($name) + } + } + set state(connection) close + set msg "proxy connect failed: $code" + # - This error message will be detected by http::OpenSocket and will + # cause it to present the proxy's HTTP response as that of the + # original $token transaction, identified only by state(proxyUsed) + # as the response of the proxy. + # - The cases where this would mislead the caller of http::geturl are + # given a different value of msg (below) so that http::OpenSocket will + # treat them as errors, but will preserve the $token array for + # inspection by the caller. + # - Status code 305 (Proxy Required) was deprecated for security reasons + # in RFC 2616 (June 1999) and in any case should never be served by a + # proxy. + # - Other 3xx responses from the proxy are inappropriate, and should not + # occur. + # - A 401 response from the proxy is inappropriate, and should not + # occur. It would be confusing if returned to the caller. + + if {($code >= 300) && ($code < 400)} { + set msg "the proxy server responded to the HTTP request with an\ + inappropriate $code redirect" + set loc [responseHeaderValue $token2 location] + if {$loc ne {}} { + append msg "to " $loc + } + } elseif {($code == 401)} { + set msg "the proxy server responded to the HTTP request with an\ + inappropriate 401 request for target-host credentials" + } else { + } + } else { + set msg "connection to proxy failed with status code $code" + } + + # - ${token2}(sock) has already been closed because -keepalive 0. + # - Error return does not pass the socket ID to the + # $token transaction, which retains its socket placeholder. + cleanup $token2 + return -code error $msg +} + +proc http::AllDone {varName args} { + set $varName done + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::socket +# ------------------------------------------------------------------------------ +# This command is a drop-in replacement for ::socket. +# Arguments and return value as for ::socket. +# +# Notes. +# - http::socket is specified in place of ::socket by the definition of urlTypes +# in the namespace header of this file (http.tcl). +# - The command makes a simple call to ::socket unless the user has called +# http::config to change the value of -threadlevel from the default value 0. +# - For -threadlevel 1 or 2, if the Thread package is available, the command +# waits in the event loop while the socket is opened in another thread. This +# is a workaround for bug [824251] - it prevents http::geturl from blocking +# the event loop if the DNS lookup or server connection is slow. +# - FIXME Use a thread pool if connections are very frequent. +# - FIXME The peer thread can transfer the socket only to the main interpreter +# in the present thread. Therefore this code works only if this script runs +# in the main interpreter. In a child interpreter, the parent must alias a +# command to ::http::socket in the child, run http::socket in the parent, +# and then transfer the socket to the child. +# - The http::socket command is simple, and can easily be replaced with an +# alternative command that uses a different technique to open a socket while +# entering the event loop. +# - Unexpected behaviour by thread::send -async (Thread 2.8.6). +# An error in thread::send -async causes return of just the error message +# (not the expected 3 elements), and raises a bgerror in the main thread. +# Hence wrap the command with catch as a precaution. +# ------------------------------------------------------------------------------ + +proc http::socket {args} { + variable ThreadVar + variable ThreadCounter + variable http + + LoadThreadIfNeeded + + set targ [lsearch -exact $args -type] + if {$targ != -1} { + set token [lindex $args $targ+1] + set args [lreplace $args $targ $targ+1] + upvar 0 $token state + } + + if {!$http(usingThread)} { + # Use plain "::socket". This is the default. + return [eval ::socket $args] + } + + set defcmd ::socket + set sockargs $args + set script " + set code \[catch { + [list proc ::SockInThread {caller defcmd sockargs} [info body ::http::SockInThread]] + [list ::SockInThread [thread::id] $defcmd $sockargs] + } result opts\] + list \$code \$opts \$result + " + + set state(tid) [thread::create] + set varName ::http::ThreadVar([incr ThreadCounter]) + thread::send -async $state(tid) $script $varName + Log >T Thread Start Wait $args -- coro [info coroutine] $varName + if {[info coroutine] ne {}} { + # All callers in the http package are coroutines launched by + # the event loop. + # The cwait command requires a coroutine because it yields + # to the caller; $varName is traced and the coroutine resumes + # when the variable is written. + cwait $varName + } else { + return -code error {code must run in a coroutine} + # For testing with a non-coroutine caller outside the http package. + # vwait $varName + } + Log >U Thread End Wait $args -- coro [info coroutine] $varName [set $varName] + thread::release $state(tid) + set state(tid) {} + set result [set $varName] + unset $varName + if {(![string is list $result]) || ([llength $result] != 3)} { + return -code error "result from peer thread is not a list of\ + length 3: it is \n$result" + } + lassign $result threadCode threadDict threadResult + if {($threadCode != 0)} { + # This is an error in thread::send. Return the lot. + return -options $threadDict -code error $threadResult + } + + # Now the results of the catch in the peer thread. + lassign $threadResult catchCode errdict sock + + if {($catchCode == 0) && ($sock ni [chan names])} { + return -code error {Transfer of socket from peer thread failed.\ + Check that this script is not running in a child interpreter.} + } + return -options $errdict -code $catchCode $sock +} + +# The commands below are dependencies of http::socket and +# http::SecureProxyConnect and are not used elsewhere. + +# ------------------------------------------------------------------------------ +# Proc http::LoadThreadIfNeeded +# ------------------------------------------------------------------------------ +# Command to load the Thread package if it is needed. If it is needed and not +# loadable, the outcome depends on $http(-threadlevel): +# value 0 => Thread package not required, no problem +# value 1 => operate as if -threadlevel 0 +# value 2 => error return +# +# Arguments: none +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::LoadThreadIfNeeded {} { + variable http + if {$http(usingThread) || ($http(-threadlevel) == 0)} { + return + } + if {[catch {package require Thread}]} { + if {$http(-threadlevel) == 2} { + set msg {[http::config -threadlevel] has value 2,\ + but the Thread package is not available} + return -code error $msg + } + return + } + set http(usingThread) 1 + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::SockInThread +# ------------------------------------------------------------------------------ +# Command http::socket is a ::socket replacement. It defines and runs this +# command, http::SockInThread, in a peer thread. +# +# Arguments: +# caller +# defcmd +# sockargs +# +# Return value: list of values that describe the outcome. The return is +# intended to be a normal (non-error) return in all cases. +# ------------------------------------------------------------------------------ + +proc http::SockInThread {caller defcmd sockargs} { + package require Thread + + set catchCode [catch {eval $defcmd $sockargs} sock errdict] + if {$catchCode == 0} { + set catchCode [catch {thread::transfer $caller $sock; set sock} sock errdict] + } + return [list $catchCode $errdict $sock] +} + + +# ------------------------------------------------------------------------------ +# Proc http::cwaiter::cwait +# ------------------------------------------------------------------------------ +# Command to substitute for vwait, without the ordering issues. +# A command that uses cwait must be a coroutine that is launched by an event, +# e.g. fileevent or after idle, and has no calling code to be resumed upon +# "yield". It cannot return a value. +# +# Arguments: +# varName - fully-qualified name of the variable that the calling script +# will write to resume the coroutine. Any scalar variable or +# array element is permitted. +# coroName - (optional) name of the coroutine to be called when varName is +# written - defaults to this coroutine +# timeout - (optional) timeout value in ms +# timeoutValue - (optional) value to assign to varName if there is a timeout +# +# Return Value: none +# ------------------------------------------------------------------------------ + +namespace eval http::cwaiter { + namespace export cwait + variable log {} + variable logOn 0 +} + +proc http::cwaiter::cwait { + varName {coroName {}} {timeout {}} {timeoutValue {}} +} { + set thisCoro [info coroutine] + if {$thisCoro eq {}} { + return -code error {cwait cannot be called outside a coroutine} + } + if {$coroName eq {}} { + set coroName $thisCoro + } + if {[string range $varName 0 1] ne {::}} { + return -code error {argument varName must be fully qualified} + } + if {$timeout eq {}} { + set toe {} + } elseif {[string is integer -strict $timeout] && ($timeout > 0)} { + set toe [after $timeout [list set $varName $timeoutValue]] + } else { + return -code error {if timeout is supplied it must be a positive integer} + } + + set cmd [list ::http::cwaiter::CwaitHelper $varName $coroName $toe] + trace add variable $varName write $cmd + CoLog "Yield $varName $coroName" + yield + CoLog "Resume $varName $coroName" + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::cwaiter::CwaitHelper +# ------------------------------------------------------------------------------ +# Helper command called by the trace set by cwait. +# - Ignores the arguments added by trace. +# - A simple call to $coroName works, and in error cases gives a suitable stack +# trace, but because it is inside a trace the headline error message is +# something like {can't set "::Result(6)": error}, not the actual +# error. So let the trace command return. +# - Remove the trace immediately. We don't want multiple calls. +# ------------------------------------------------------------------------------ + +proc http::cwaiter::CwaitHelper {varName coroName toe args} { + CoLog "got $varName for $coroName" + set cmd [list ::http::cwaiter::CwaitHelper $varName $coroName $toe] + trace remove variable $varName write $cmd + after cancel $toe + + after 0 $coroName + return +} + + +# ------------------------------------------------------------------------------ +# Proc http::cwaiter::LogInit +# ------------------------------------------------------------------------------ +# Call this command to initiate debug logging and clear the log. +# ------------------------------------------------------------------------------ + +proc http::cwaiter::LogInit {} { + variable log + variable logOn + set log {} + set logOn 1 + return +} + +proc http::cwaiter::LogRead {} { + variable log + return $log +} + +proc http::cwaiter::CoLog {msg} { + variable log + variable logOn + if {$logOn} { + append log $msg \n + } + return +} + +namespace eval http { + namespace import ::http::cwaiter::* +} + +# Local variables: +# indent-tabs-mode: t +# End: diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/md5-2.0.8.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/md5-2.0.8.tm new file mode 100644 index 00000000..f021c0ac --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/md5-2.0.8.tm @@ -0,0 +1,739 @@ +# md5.tcl - Copyright (C) 2003 Pat Thoyts +# +# MD5 defined by RFC 1321, "The MD5 Message-Digest Algorithm" +# HMAC defined by RFC 2104, "Keyed-Hashing for Message Authentication" +# +# This is an implementation of MD5 based upon the example code given in +# RFC 1321 and upon the tcllib MD4 implementation and taking some ideas +# from the earlier tcllib md5 version by Don Libes. +# +# This implementation permits incremental updating of the hash and +# provides support for external compiled implementations either using +# critcl (md5c) or Trf. +# +# ------------------------------------------------------------------------- +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# ------------------------------------------------------------------------- + +package require Tcl 8.2; # tcl minimum version + +namespace eval ::md5 { + variable accel + array set accel {critcl 0 cryptkit 0 trf 0} + + namespace export md5 hmac MD5Init MD5Update MD5Final + + variable uid + if {![info exists uid]} { + set uid 0 + } +} + +# ------------------------------------------------------------------------- + +# MD5Init -- +# +# Create and initialize an MD5 state variable. This will be +# cleaned up when we call MD5Final +# +proc ::md5::MD5Init {} { + variable accel + variable uid + set token [namespace current]::[incr uid] + upvar #0 $token state + + # RFC1321:3.3 - Initialize MD5 state structure + array set state \ + [list \ + A [expr {0x67452301}] \ + B [expr {0xefcdab89}] \ + C [expr {0x98badcfe}] \ + D [expr {0x10325476}] \ + n 0 i "" ] + if {$accel(cryptkit)} { + cryptkit::cryptCreateContext state(ckctx) CRYPT_UNUSED CRYPT_ALGO_MD5 + } elseif {$accel(trf)} { + set s {} + switch -exact -- $::tcl_platform(platform) { + windows { set s [open NUL w] } + unix { set s [open /dev/null w] } + } + if {$s != {}} { + fconfigure $s -translation binary -buffering none + ::md5 -attach $s -mode write \ + -read-type variable \ + -read-destination [subst $token](trfread) \ + -write-type variable \ + -write-destination [subst $token](trfwrite) + array set state [list trfread 0 trfwrite 0 trf $s] + } + } + return $token +} + +# MD5Update -- +# +# This is called to add more data into the hash. You may call this +# as many times as you require. Note that passing in "ABC" is equivalent +# to passing these letters in as separate calls -- hence this proc +# permits hashing of chunked data +# +# If we have a C-based implementation available, then we will use +# it here in preference to the pure-Tcl implementation. +# +proc ::md5::MD5Update {token data} { + variable accel + upvar #0 $token state + + if {$accel(critcl)} { + if {[info exists state(md5c)]} { + set state(md5c) [md5c $data $state(md5c)] + } else { + set state(md5c) [md5c $data] + } + return + } elseif {[info exists state(ckctx)]} { + if {[string length $data] > 0} { + cryptkit::cryptEncrypt $state(ckctx) $data + } + return + } elseif {[info exists state(trf)]} { + puts -nonewline $state(trf) $data + return + } + + # Update the state values + incr state(n) [string length $data] + append state(i) $data + + # Calculate the hash for any complete blocks + set len [string length $state(i)] + for {set n 0} {($n + 64) <= $len} {} { + MD5Hash $token [string range $state(i) $n [incr n 64]] + } + + # Adjust the state for the blocks completed. + set state(i) [string range $state(i) $n end] + return +} + +# MD5Final -- +# +# This procedure is used to close the current hash and returns the +# hash data. Once this procedure has been called the hash context +# is freed and cannot be used again. +# +# Note that the output is 128 bits represented as binary data. +# +proc ::md5::MD5Final {token} { + upvar #0 $token state + + # Check for either of the C-compiled versions. + if {[info exists state(md5c)]} { + set r $state(md5c) + unset state + return $r + } elseif {[info exists state(ckctx)]} { + cryptkit::cryptEncrypt $state(ckctx) "" + cryptkit::cryptGetAttributeString $state(ckctx) \ + CRYPT_CTXINFO_HASHVALUE r 16 + cryptkit::cryptDestroyContext $state(ckctx) + # If nothing was hashed, we get no r variable set! + if {[info exists r]} { + unset state + return $r + } + } elseif {[info exists state(trf)]} { + close $state(trf) + set r $state(trfwrite) + unset state + return $r + } + + # RFC1321:3.1 - Padding + # + set len [string length $state(i)] + set pad [expr {56 - ($len % 64)}] + if {$len % 64 > 56} { + incr pad 64 + } + if {$pad == 0} { + incr pad 64 + } + + #puts "P $pad|bits=[expr {8 * $state(n)}]" + + append state(i) [binary format a$pad \x80] + + # RFC1321:3.2 - Append length in bits as little-endian wide int. + append state(i) [binary format ii [expr {8 * $state(n)}] 0] + + #puts DATA=[Hex $state(i)]([string length $state(i)]) + + # Calculate the hash for the remaining block. + set len [string length $state(i)] + for {set n 0} {($n + 64) <= $len} {} { + MD5Hash $token [string range $state(i) $n [incr n 64]] + } + + #puts md5-post__________________________________________ + #parray ::${token} + + # RFC1321:3.5 - Output + set r [bytes $state(A)][bytes $state(B)][bytes $state(C)][bytes $state(D)] + unset state + + #puts HASH=[Hex $r] + return $r +} + +# ------------------------------------------------------------------------- +# HMAC Hashed Message Authentication (RFC 2104) +# +# hmac = H(K xor opad, H(K xor ipad, text)) +# + +# HMACInit -- +# +# This is equivalent to the MD5Init procedure except that a key is +# added into the algorithm +# +proc ::md5::HMACInit {K} { + + # Key K is adjusted to be 64 bytes long. If K is larger, then use + # the MD5 digest of K and pad this instead. + set len [string length $K] + if {$len > 64} { + set tok [MD5Init] + MD5Update $tok $K + set K [MD5Final $tok] + set len [string length $K] + } + set pad [expr {64 - $len}] + append K [string repeat \0 $pad] + + # Cacluate the padding buffers. + set Ki {} + set Ko {} + binary scan $K i16 Ks + foreach k $Ks { + append Ki [binary format i [expr {$k ^ 0x36363636}]] + append Ko [binary format i [expr {$k ^ 0x5c5c5c5c}]] + } + + set tok [MD5Init] + MD5Update $tok $Ki; # initialize with the inner pad + + # preserve the Ko value for the final stage. + # FRINK: nocheck + set [subst $tok](Ko) $Ko + + return $tok +} + +# HMACUpdate -- +# +# Identical to calling MD5Update +# +proc ::md5::HMACUpdate {token data} { + MD5Update $token $data + return +} + +# HMACFinal -- +# +# This is equivalent to the MD5Final procedure. The hash context is +# closed and the binary representation of the hash result is returned. +# +proc ::md5::HMACFinal {token} { + upvar #0 $token state + + set tok [MD5Init]; # init the outer hashing function + MD5Update $tok $state(Ko); # prepare with the outer pad. + MD5Update $tok [MD5Final $token]; # hash the inner result + return [MD5Final $tok] +} + +# ------------------------------------------------------------------------- +# Description: +# This is the core MD5 algorithm. It is a lot like the MD4 algorithm but +# includes an extra round and a set of constant modifiers throughout. +# +# Note: +# This function body is substituted later on to inline some of the +# procedures and to make is a bit more comprehensible. +# +set ::md5::MD5Hash_body { + variable $token + upvar 0 $token state + + #puts TR__=[Hex $msg]([string length $msg]) + + # RFC1321:3.4 - Process Message in 16-Word Blocks + binary scan $msg i* blocks + foreach {X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15} $blocks { + #puts BL + + set A $state(A) + set B $state(B) + set C $state(C) + set D $state(D) + + # Round 1 + # Let [abcd k s i] denote the operation + # a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). + # Do the following 16 operations. + # [ABCD 0 7 1] [DABC 1 12 2] [CDAB 2 17 3] [BCDA 3 22 4] + set A [expr {$B + (($A + [F $B $C $D] + $X0 + $T01) <<< 7)}] + set D [expr {$A + (($D + [F $A $B $C] + $X1 + $T02) <<< 12)}] + set C [expr {$D + (($C + [F $D $A $B] + $X2 + $T03) <<< 17)}] + set B [expr {$C + (($B + [F $C $D $A] + $X3 + $T04) <<< 22)}] + # [ABCD 4 7 5] [DABC 5 12 6] [CDAB 6 17 7] [BCDA 7 22 8] + set A [expr {$B + (($A + [F $B $C $D] + $X4 + $T05) <<< 7)}] + set D [expr {$A + (($D + [F $A $B $C] + $X5 + $T06) <<< 12)}] + set C [expr {$D + (($C + [F $D $A $B] + $X6 + $T07) <<< 17)}] + set B [expr {$C + (($B + [F $C $D $A] + $X7 + $T08) <<< 22)}] + # [ABCD 8 7 9] [DABC 9 12 10] [CDAB 10 17 11] [BCDA 11 22 12] + set A [expr {$B + (($A + [F $B $C $D] + $X8 + $T09) <<< 7)}] + set D [expr {$A + (($D + [F $A $B $C] + $X9 + $T10) <<< 12)}] + set C [expr {$D + (($C + [F $D $A $B] + $X10 + $T11) <<< 17)}] + set B [expr {$C + (($B + [F $C $D $A] + $X11 + $T12) <<< 22)}] + # [ABCD 12 7 13] [DABC 13 12 14] [CDAB 14 17 15] [BCDA 15 22 16] + set A [expr {$B + (($A + [F $B $C $D] + $X12 + $T13) <<< 7)}] + set D [expr {$A + (($D + [F $A $B $C] + $X13 + $T14) <<< 12)}] + set C [expr {$D + (($C + [F $D $A $B] + $X14 + $T15) <<< 17)}] + set B [expr {$C + (($B + [F $C $D $A] + $X15 + $T16) <<< 22)}] + + # Round 2. + # Let [abcd k s i] denote the operation + # a = b + ((a + G(b,c,d) + X[k] + Ti) <<< s) + # Do the following 16 operations. + # [ABCD 1 5 17] [DABC 6 9 18] [CDAB 11 14 19] [BCDA 0 20 20] + set A [expr {$B + (($A + [G $B $C $D] + $X1 + $T17) <<< 5)}] + set D [expr {$A + (($D + [G $A $B $C] + $X6 + $T18) <<< 9)}] + set C [expr {$D + (($C + [G $D $A $B] + $X11 + $T19) <<< 14)}] + set B [expr {$C + (($B + [G $C $D $A] + $X0 + $T20) <<< 20)}] + # [ABCD 5 5 21] [DABC 10 9 22] [CDAB 15 14 23] [BCDA 4 20 24] + set A [expr {$B + (($A + [G $B $C $D] + $X5 + $T21) <<< 5)}] + set D [expr {$A + (($D + [G $A $B $C] + $X10 + $T22) <<< 9)}] + set C [expr {$D + (($C + [G $D $A $B] + $X15 + $T23) <<< 14)}] + set B [expr {$C + (($B + [G $C $D $A] + $X4 + $T24) <<< 20)}] + # [ABCD 9 5 25] [DABC 14 9 26] [CDAB 3 14 27] [BCDA 8 20 28] + set A [expr {$B + (($A + [G $B $C $D] + $X9 + $T25) <<< 5)}] + set D [expr {$A + (($D + [G $A $B $C] + $X14 + $T26) <<< 9)}] + set C [expr {$D + (($C + [G $D $A $B] + $X3 + $T27) <<< 14)}] + set B [expr {$C + (($B + [G $C $D $A] + $X8 + $T28) <<< 20)}] + # [ABCD 13 5 29] [DABC 2 9 30] [CDAB 7 14 31] [BCDA 12 20 32] + set A [expr {$B + (($A + [G $B $C $D] + $X13 + $T29) <<< 5)}] + set D [expr {$A + (($D + [G $A $B $C] + $X2 + $T30) <<< 9)}] + set C [expr {$D + (($C + [G $D $A $B] + $X7 + $T31) <<< 14)}] + set B [expr {$C + (($B + [G $C $D $A] + $X12 + $T32) <<< 20)}] + + # Round 3. + # Let [abcd k s i] denote the operation + # a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s) + # Do the following 16 operations. + # [ABCD 5 4 33] [DABC 8 11 34] [CDAB 11 16 35] [BCDA 14 23 36] + set A [expr {$B + (($A + [H $B $C $D] + $X5 + $T33) <<< 4)}] + set D [expr {$A + (($D + [H $A $B $C] + $X8 + $T34) <<< 11)}] + set C [expr {$D + (($C + [H $D $A $B] + $X11 + $T35) <<< 16)}] + set B [expr {$C + (($B + [H $C $D $A] + $X14 + $T36) <<< 23)}] + # [ABCD 1 4 37] [DABC 4 11 38] [CDAB 7 16 39] [BCDA 10 23 40] + set A [expr {$B + (($A + [H $B $C $D] + $X1 + $T37) <<< 4)}] + set D [expr {$A + (($D + [H $A $B $C] + $X4 + $T38) <<< 11)}] + set C [expr {$D + (($C + [H $D $A $B] + $X7 + $T39) <<< 16)}] + set B [expr {$C + (($B + [H $C $D $A] + $X10 + $T40) <<< 23)}] + # [ABCD 13 4 41] [DABC 0 11 42] [CDAB 3 16 43] [BCDA 6 23 44] + set A [expr {$B + (($A + [H $B $C $D] + $X13 + $T41) <<< 4)}] + set D [expr {$A + (($D + [H $A $B $C] + $X0 + $T42) <<< 11)}] + set C [expr {$D + (($C + [H $D $A $B] + $X3 + $T43) <<< 16)}] + set B [expr {$C + (($B + [H $C $D $A] + $X6 + $T44) <<< 23)}] + # [ABCD 9 4 45] [DABC 12 11 46] [CDAB 15 16 47] [BCDA 2 23 48] + set A [expr {$B + (($A + [H $B $C $D] + $X9 + $T45) <<< 4)}] + set D [expr {$A + (($D + [H $A $B $C] + $X12 + $T46) <<< 11)}] + set C [expr {$D + (($C + [H $D $A $B] + $X15 + $T47) <<< 16)}] + set B [expr {$C + (($B + [H $C $D $A] + $X2 + $T48) <<< 23)}] + + # Round 4. + # Let [abcd k s i] denote the operation + # a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s) + # Do the following 16 operations. + # [ABCD 0 6 49] [DABC 7 10 50] [CDAB 14 15 51] [BCDA 5 21 52] + set A [expr {$B + (($A + [I $B $C $D] + $X0 + $T49) <<< 6)}] + set D [expr {$A + (($D + [I $A $B $C] + $X7 + $T50) <<< 10)}] + set C [expr {$D + (($C + [I $D $A $B] + $X14 + $T51) <<< 15)}] + set B [expr {$C + (($B + [I $C $D $A] + $X5 + $T52) <<< 21)}] + # [ABCD 12 6 53] [DABC 3 10 54] [CDAB 10 15 55] [BCDA 1 21 56] + set A [expr {$B + (($A + [I $B $C $D] + $X12 + $T53) <<< 6)}] + set D [expr {$A + (($D + [I $A $B $C] + $X3 + $T54) <<< 10)}] + set C [expr {$D + (($C + [I $D $A $B] + $X10 + $T55) <<< 15)}] + set B [expr {$C + (($B + [I $C $D $A] + $X1 + $T56) <<< 21)}] + # [ABCD 8 6 57] [DABC 15 10 58] [CDAB 6 15 59] [BCDA 13 21 60] + set A [expr {$B + (($A + [I $B $C $D] + $X8 + $T57) <<< 6)}] + set D [expr {$A + (($D + [I $A $B $C] + $X15 + $T58) <<< 10)}] + set C [expr {$D + (($C + [I $D $A $B] + $X6 + $T59) <<< 15)}] + set B [expr {$C + (($B + [I $C $D $A] + $X13 + $T60) <<< 21)}] + # [ABCD 4 6 61] [DABC 11 10 62] [CDAB 2 15 63] [BCDA 9 21 64] + set A [expr {$B + (($A + [I $B $C $D] + $X4 + $T61) <<< 6)}] + set D [expr {$A + (($D + [I $A $B $C] + $X11 + $T62) <<< 10)}] + set C [expr {$D + (($C + [I $D $A $B] + $X2 + $T63) <<< 15)}] + set B [expr {$C + (($B + [I $C $D $A] + $X9 + $T64) <<< 21)}] + + # Then perform the following additions. (That is, increment each + # of the four registers by the value it had before this block + # was started.) + incr state(A) $A + incr state(B) $B + incr state(C) $C + incr state(D) $D + } + + return +} + +proc ::md5::byte {n v} {expr {((0xFF << (8 * $n)) & $v) >> (8 * $n)}} +proc ::md5::bytes {v} { + #format %c%c%c%c [byte 0 $v] [byte 1 $v] [byte 2 $v] [byte 3 $v] + format %c%c%c%c \ + [expr {0xFF & $v}] \ + [expr {(0xFF00 & $v) >> 8}] \ + [expr {(0xFF0000 & $v) >> 16}] \ + [expr {((0xFF000000 & $v) >> 24) & 0xFF}] +} + +# 32bit rotate-left +proc ::md5::<<< {v n} { + return [expr {((($v << $n) \ + | (($v >> (32 - $n)) \ + & (0x7FFFFFFF >> (31 - $n))))) \ + & 0xFFFFFFFF}] +} + +# Convert our <<< pseudo-operator into a procedure call. +regsub -all -line \ + {\[expr {(\$[ABCD]) \+ \(\((.*)\)\s+<<<\s+(\d+)\)}\]} \ + $::md5::MD5Hash_body \ + {[expr {int(\1 + [<<< [expr {\2}] \3])}]} \ + ::md5::MD5Hash_body + +# RFC1321:3.4 - function F +proc ::md5::F {X Y Z} { + return [expr {($X & $Y) | ((~$X) & $Z)}] +} + +# Inline the F function +regsub -all -line \ + {\[F (\$[ABCD]) (\$[ABCD]) (\$[ABCD])\]} \ + $::md5::MD5Hash_body \ + {( (\1 \& \2) | ((~\1) \& \3) )} \ + ::md5::MD5Hash_body + +# RFC1321:3.4 - function G +proc ::md5::G {X Y Z} { + return [expr {(($X & $Z) | ($Y & (~$Z)))}] +} + +# Inline the G function +regsub -all -line \ + {\[G (\$[ABCD]) (\$[ABCD]) (\$[ABCD])\]} \ + $::md5::MD5Hash_body \ + {(((\1 \& \3) | (\2 \& (~\3))))} \ + ::md5::MD5Hash_body + +# RFC1321:3.4 - function H +proc ::md5::H {X Y Z} { + return [expr {$X ^ $Y ^ $Z}] +} + +# Inline the H function +regsub -all -line \ + {\[H (\$[ABCD]) (\$[ABCD]) (\$[ABCD])\]} \ + $::md5::MD5Hash_body \ + {(\1 ^ \2 ^ \3)} \ + ::md5::MD5Hash_body + +# RFC1321:3.4 - function I +proc ::md5::I {X Y Z} { + return [expr {$Y ^ ($X | (~$Z))}] +} + +# Inline the I function +regsub -all -line \ + {\[I (\$[ABCD]) (\$[ABCD]) (\$[ABCD])\]} \ + $::md5::MD5Hash_body \ + {(\2 ^ (\1 | (~\3)))} \ + ::md5::MD5Hash_body + + +# RFC 1321:3.4 step 4: inline the set of constant modifiers. +namespace eval md5 { + variable tName + variable tVal + variable map + foreach tName { + T01 T02 T03 T04 T05 T06 T07 T08 T09 T10 + T11 T12 T13 T14 T15 T16 T17 T18 T19 T20 + T21 T22 T23 T24 T25 T26 T27 T28 T29 T30 + T31 T32 T33 T34 T35 T36 T37 T38 T39 T40 + T41 T42 T43 T44 T45 T46 T47 T48 T49 T50 + T51 T52 T53 T54 T55 T56 T57 T58 T59 T60 + T61 T62 T63 T64 + } tVal { + 0xd76aa478 0xe8c7b756 0x242070db 0xc1bdceee + 0xf57c0faf 0x4787c62a 0xa8304613 0xfd469501 + 0x698098d8 0x8b44f7af 0xffff5bb1 0x895cd7be + 0x6b901122 0xfd987193 0xa679438e 0x49b40821 + + 0xf61e2562 0xc040b340 0x265e5a51 0xe9b6c7aa + 0xd62f105d 0x2441453 0xd8a1e681 0xe7d3fbc8 + 0x21e1cde6 0xc33707d6 0xf4d50d87 0x455a14ed + 0xa9e3e905 0xfcefa3f8 0x676f02d9 0x8d2a4c8a + + 0xfffa3942 0x8771f681 0x6d9d6122 0xfde5380c + 0xa4beea44 0x4bdecfa9 0xf6bb4b60 0xbebfbc70 + 0x289b7ec6 0xeaa127fa 0xd4ef3085 0x4881d05 + 0xd9d4d039 0xe6db99e5 0x1fa27cf8 0xc4ac5665 + + 0xf4292244 0x432aff97 0xab9423a7 0xfc93a039 + 0x655b59c3 0x8f0ccc92 0xffeff47d 0x85845dd1 + 0x6fa87e4f 0xfe2ce6e0 0xa3014314 0x4e0811a1 + 0xf7537e82 0xbd3af235 0x2ad7d2bb 0xeb86d391 + } { + lappend map \$$tName $tVal + } + set ::md5::MD5Hash_body [string map $map $::md5::MD5Hash_body] + unset map tName tVal +} + +# Define the MD5 hashing procedure with inline functions. +proc ::md5::MD5Hash {token msg} $::md5::MD5Hash_body +unset ::md5::MD5Hash_body + +# ------------------------------------------------------------------------- + +if {[package provide Trf] != {}} { + interp alias {} ::md5::Hex {} ::hex -mode encode -- +} else { + proc ::md5::Hex {data} { + binary scan $data H* result + return [string toupper $result] + } +} + +# ------------------------------------------------------------------------- + +# LoadAccelerator -- +# +# This package can make use of a number of compiled extensions to +# accelerate the digest computation. This procedure manages the +# use of these extensions within the package. During normal usage +# this should not be called, but the test package manipulates the +# list of enabled accelerators. +# +proc ::md5::LoadAccelerator {name} { + variable accel + set r 0 + switch -exact -- $name { + critcl { + if {![catch {package require tcllibc}] + || ![catch {package require md5c}]} { + set r [expr {[info commands ::md5::md5c] != {}}] + } + } + cryptkit { + if {![catch {package require cryptkit}]} { + set r [expr {![catch {cryptkit::cryptInit}]}] + } + } + trf { + if {![catch {package require Trf}]} { + set r [expr {![catch {::md5 aa} msg]}] + } + } + default { + return -code error "invalid accelerator package:\ + must be one of [join [array names accel] {, }]" + } + } + set accel($name) $r +} + +# ------------------------------------------------------------------------- + +# Description: +# Pop the nth element off a list. Used in options processing. +# +proc ::md5::Pop {varname {nth 0}} { + upvar $varname args + set r [lindex $args $nth] + set args [lreplace $args $nth $nth] + return $r +} + +# ------------------------------------------------------------------------- + +# fileevent handler for chunked file hashing. +# +proc ::md5::Chunk {token channel {chunksize 4096}} { + upvar #0 $token state + + if {[eof $channel]} { + fileevent $channel readable {} + set state(reading) 0 + } + + MD5Update $token [read $channel $chunksize] +} + +# ------------------------------------------------------------------------- + +proc ::md5::md5 {args} { + array set opts {-hex 0 -filename {} -channel {} -chunksize 4096} + while {[string match -* [set option [lindex $args 0]]]} { + switch -glob -- $option { + -hex { set opts(-hex) 1 } + -file* { set opts(-filename) [Pop args 1] } + -channel { set opts(-channel) [Pop args 1] } + -chunksize { set opts(-chunksize) [Pop args 1] } + default { + if {[llength $args] == 1} { break } + if {[string compare $option "--"] == 0} { Pop args; break } + set err [join [lsort [array names opts]] ", "] + return -code error "bad option $option:\ + must be one of $err\nlen: [llength $args]" + } + } + Pop args + } + + if {$opts(-filename) != {}} { + set opts(-channel) [open $opts(-filename) r] + fconfigure $opts(-channel) -translation binary + } + + if {$opts(-channel) == {}} { + + if {[llength $args] != 1} { + return -code error "wrong # args:\ + should be \"md5 ?-hex? -filename file | string\"" + } + set tok [MD5Init] + + #puts md5_______________________________________________ + #parray ::${tok} + + #puts IN=(([lindex $args 0])) + MD5Update $tok [lindex $args 0] + + #puts md5-final_________________________________________ + #parray ::${tok} + + set r [MD5Final $tok] + + } else { + + set tok [MD5Init] + # FRINK: nocheck + set [subst $tok](reading) 1 + fileevent $opts(-channel) readable \ + [list [namespace origin Chunk] \ + $tok $opts(-channel) $opts(-chunksize)] + vwait [subst $tok](reading) + set r [MD5Final $tok] + + # If we opened the channel - we should close it too. + if {$opts(-filename) != {}} { + close $opts(-channel) + } + } + + if {$opts(-hex)} { + set r [Hex $r] + } + return $r +} + +# ------------------------------------------------------------------------- + +proc ::md5::hmac {args} { + array set opts {-hex 0 -filename {} -channel {} -chunksize 4096} + while {[string match -* [set option [lindex $args 0]]]} { + switch -glob -- $option { + -key { set opts(-key) [Pop args 1] } + -hex { set opts(-hex) 1 } + -file* { set opts(-filename) [Pop args 1] } + -channel { set opts(-channel) [Pop args 1] } + -chunksize { set opts(-chunksize) [Pop args 1] } + default { + if {[llength $args] == 1} { break } + if {[string compare $option "--"] == 0} { Pop args; break } + set err [join [lsort [array names opts]] ", "] + return -code error "bad option $option:\ + must be one of $err" + } + } + Pop args + } + + if {![info exists opts(-key)]} { + return -code error "wrong # args:\ + should be \"hmac ?-hex? -key key -filename file | string\"" + } + + if {$opts(-filename) != {}} { + set opts(-channel) [open $opts(-filename) r] + fconfigure $opts(-channel) -translation binary + } + + if {$opts(-channel) == {}} { + + if {[llength $args] != 1} { + return -code error "wrong # args:\ + should be \"hmac ?-hex? -key key -filename file | string\"" + } + set tok [HMACInit $opts(-key)] + HMACUpdate $tok [lindex $args 0] + set r [HMACFinal $tok] + + } else { + + set tok [HMACInit $opts(-key)] + # FRINK: nocheck + set [subst $tok](reading) 1 + fileevent $opts(-channel) readable \ + [list [namespace origin Chunk] \ + $tok $opts(-channel) $opts(-chunksize)] + vwait [subst $tok](reading) + set r [HMACFinal $tok] + + # If we opened the channel - we should close it too. + if {$opts(-filename) != {}} { + close $opts(-channel) + } + } + + if {$opts(-hex)} { + set r [Hex $r] + } + return $r +} + +# ------------------------------------------------------------------------- + +# Try and load a compiled extension to help. +namespace eval ::md5 { + variable e + foreach e {critcl cryptkit trf} { if {[LoadAccelerator $e]} { break } } + unset e +} + +package provide md5 2.0.8 + +# ------------------------------------------------------------------------- +# Local Variables: +# mode: tcl +# indent-tabs-mode: nil +# End: + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/mime-1.7.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/mime-1.7.0.tm new file mode 100644 index 00000000..fa460769 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/mime-1.7.0.tm @@ -0,0 +1,3942 @@ +# mime.tcl - MIME body parts +# +# (c) 1999-2000 Marshall T. Rose +# (c) 2000 Brent Welch +# (c) 2000 Sandeep Tamhankar +# (c) 2000 Dan Kuchler +# (c) 2000-2001 Eric Melski +# (c) 2001 Jeff Hobbs +# (c) 2001-2008 Andreas Kupries +# (c) 2002-2003 David Welton +# (c) 2003-2008 Pat Thoyts +# (c) 2005 Benjamin Riefenstahl +# (c) 2013-2021 Poor Yorick +# +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# Influenced by Borenstein's/Rose's safe-tcl (circa 1993) and Darren New's +# unpublished package of 1999. +# + +# new string features and inline scan are used, requiring 8.3. +package require Tcl 8.5 + +package provide mime 1.7.0 +package require tcl::chan::memchan + + +if {[catch {package require Trf 2.0}]} { + + # Fall-back to tcl-based procedures of base64 and quoted-printable + # encoders + ## + # Warning! + ## + # These are a fragile emulations of the more general calling + # sequence that appears to work with this code here. + ## + # The `__ignored__` arguments are expected to be `--` options on + # the caller's side. (See the uses in `copymessageaux`, + # `buildmessageaux`, `parsepart`, and `getbody`). + + package require base64 2.0 + set ::major [lindex [split [package require md5] .] 0] + + # Create these commands in the mime namespace so that they + # won't collide with things at the global namespace level + + namespace eval ::mime { + proc base64 {-mode what __ignored__ chunk} { + return [base64::$what $chunk] + } + proc quoted-printable {-mode what __ignored__ chunk} { + return [mime::qp_$what $chunk] + } + + if {$::major < 2} { + # md5 v1, result is hex string ready for use. + proc md5 {__ignored__ string} { + return [md5::md5 $string] + } + } else { + # md5 v2, need option to get hex string + proc md5 {__ignored__ string} { + return [md5::md5 -hex $string] + } + } + } + + unset ::major +} + +# +# state variables: +# +# canonicalP: input is in its canonical form +# content: type/subtype +# params: dictionary (keys are lower-case) +# encoding: transfer encoding +# version: MIME-version +# header: dictionary (keys are lower-case) +# lowerL: list of header keys, lower-case +# mixedL: list of header keys, mixed-case +# value: either "file", "parts", or "string" +# +# file: input file +# fd: cached file-descriptor, typically for root +# root: token for top-level part, for (distant) subordinates +# offset: number of octets from beginning of file/string +# count: length in octets of (encoded) content +# +# parts: list of bodies (tokens) +# +# string: input string +# +# cid: last child-id assigned +# + + +namespace eval ::mime { + variable mime + array set mime {uid 0 cid 0} + + # RFC 822 lexemes + variable addrtokenL + lappend addrtokenL \; , < > : . ( ) @ \" \[ ] \\ + variable addrlexemeL { + LX_SEMICOLON LX_COMMA + LX_LBRACKET LX_RBRACKET + LX_COLON LX_DOT + LX_LPAREN LX_RPAREN + LX_ATSIGN LX_QUOTE + LX_LSQUARE LX_RSQUARE + LX_QUOTE + } + + # RFC 2045 lexemes + variable typetokenL + lappend typetokenL \; , < > : ? ( ) @ \" \[ \] = / \\ + variable typelexemeL { + LX_SEMICOLON LX_COMMA + LX_LBRACKET LX_RBRACKET + LX_COLON LX_QUESTION + LX_LPAREN LX_RPAREN + LX_ATSIGN LX_QUOTE + LX_LSQUARE LX_RSQUARE + LX_EQUALS LX_SOLIDUS + LX_QUOTE + } + + variable encList { + ascii US-ASCII + big5 Big5 + cp1250 Windows-1250 + cp1251 Windows-1251 + cp1252 Windows-1252 + cp1253 Windows-1253 + cp1254 Windows-1254 + cp1255 Windows-1255 + cp1256 Windows-1256 + cp1257 Windows-1257 + cp1258 Windows-1258 + cp437 IBM437 + cp737 {} + cp775 IBM775 + cp850 IBM850 + cp852 IBM852 + cp855 IBM855 + cp857 IBM857 + cp860 IBM860 + cp861 IBM861 + cp862 IBM862 + cp863 IBM863 + cp864 IBM864 + cp865 IBM865 + cp866 IBM866 + cp869 IBM869 + cp874 {} + cp932 {} + cp936 GBK + cp949 {} + cp950 {} + dingbats {} + ebcdic {} + euc-cn EUC-CN + euc-jp EUC-JP + euc-kr EUC-KR + gb12345 GB12345 + gb1988 GB1988 + gb2312 GB2312 + iso2022 ISO-2022 + iso2022-jp ISO-2022-JP + iso2022-kr ISO-2022-KR + iso8859-1 ISO-8859-1 + iso8859-2 ISO-8859-2 + iso8859-3 ISO-8859-3 + iso8859-4 ISO-8859-4 + iso8859-5 ISO-8859-5 + iso8859-6 ISO-8859-6 + iso8859-7 ISO-8859-7 + iso8859-8 ISO-8859-8 + iso8859-9 ISO-8859-9 + iso8859-10 ISO-8859-10 + iso8859-13 ISO-8859-13 + iso8859-14 ISO-8859-14 + iso8859-15 ISO-8859-15 + iso8859-16 ISO-8859-16 + jis0201 JIS_X0201 + jis0208 JIS_C6226-1983 + jis0212 JIS_X0212-1990 + koi8-r KOI8-R + koi8-u KOI8-U + ksc5601 KS_C_5601-1987 + macCentEuro {} + macCroatian {} + macCyrillic {} + macDingbats {} + macGreek {} + macIceland {} + macJapan {} + macRoman {} + macRomania {} + macThai {} + macTurkish {} + macUkraine {} + shiftjis Shift_JIS + symbol {} + tis-620 TIS-620 + unicode {} + utf-8 UTF-8 + } + + variable encodings + array set encodings $encList + variable reversemap + # Initialized at the bottom of the file + + variable encAliasList { + ascii ANSI_X3.4-1968 + ascii iso-ir-6 + ascii ANSI_X3.4-1986 + ascii ISO_646.irv:1991 + ascii ASCII + ascii ISO646-US + ascii us + ascii IBM367 + ascii cp367 + cp437 cp437 + cp437 437 + cp775 cp775 + cp850 cp850 + cp850 850 + cp852 cp852 + cp852 852 + cp855 cp855 + cp855 855 + cp857 cp857 + cp857 857 + cp860 cp860 + cp860 860 + cp861 cp861 + cp861 861 + cp861 cp-is + cp862 cp862 + cp862 862 + cp863 cp863 + cp863 863 + cp864 cp864 + cp865 cp865 + cp865 865 + cp866 cp866 + cp866 866 + cp869 cp869 + cp869 869 + cp869 cp-gr + cp936 CP936 + cp936 MS936 + cp936 Windows-936 + iso8859-1 ISO_8859-1:1987 + iso8859-1 iso-ir-100 + iso8859-1 ISO_8859-1 + iso8859-1 latin1 + iso8859-1 l1 + iso8859-1 IBM819 + iso8859-1 CP819 + iso8859-2 ISO_8859-2:1987 + iso8859-2 iso-ir-101 + iso8859-2 ISO_8859-2 + iso8859-2 latin2 + iso8859-2 l2 + iso8859-3 ISO_8859-3:1988 + iso8859-3 iso-ir-109 + iso8859-3 ISO_8859-3 + iso8859-3 latin3 + iso8859-3 l3 + iso8859-4 ISO_8859-4:1988 + iso8859-4 iso-ir-110 + iso8859-4 ISO_8859-4 + iso8859-4 latin4 + iso8859-4 l4 + iso8859-5 ISO_8859-5:1988 + iso8859-5 iso-ir-144 + iso8859-5 ISO_8859-5 + iso8859-5 cyrillic + iso8859-6 ISO_8859-6:1987 + iso8859-6 iso-ir-127 + iso8859-6 ISO_8859-6 + iso8859-6 ECMA-114 + iso8859-6 ASMO-708 + iso8859-6 arabic + iso8859-7 ISO_8859-7:1987 + iso8859-7 iso-ir-126 + iso8859-7 ISO_8859-7 + iso8859-7 ELOT_928 + iso8859-7 ECMA-118 + iso8859-7 greek + iso8859-7 greek8 + iso8859-8 ISO_8859-8:1988 + iso8859-8 iso-ir-138 + iso8859-8 ISO_8859-8 + iso8859-8 hebrew + iso8859-9 ISO_8859-9:1989 + iso8859-9 iso-ir-148 + iso8859-9 ISO_8859-9 + iso8859-9 latin5 + iso8859-9 l5 + iso8859-10 iso-ir-157 + iso8859-10 l6 + iso8859-10 ISO_8859-10:1992 + iso8859-10 latin6 + iso8859-14 iso-ir-199 + iso8859-14 ISO_8859-14:1998 + iso8859-14 ISO_8859-14 + iso8859-14 latin8 + iso8859-14 iso-celtic + iso8859-14 l8 + iso8859-15 ISO_8859-15 + iso8859-15 Latin-9 + iso8859-16 iso-ir-226 + iso8859-16 ISO_8859-16:2001 + iso8859-16 ISO_8859-16 + iso8859-16 latin10 + iso8859-16 l10 + jis0201 X0201 + jis0208 iso-ir-87 + jis0208 x0208 + jis0208 JIS_X0208-1983 + jis0212 x0212 + jis0212 iso-ir-159 + ksc5601 iso-ir-149 + ksc5601 KS_C_5601-1989 + ksc5601 KSC5601 + ksc5601 korean + shiftjis MS_Kanji + utf-8 UTF8 + } + + namespace export {*}{ + copymessage finalize getbody getheader getproperty initialize + mapencoding parseaddress parsedatetime reversemapencoding setheader + uniqueID + } +} + +# ::mime::initialize -- +# +# Creates a MIME part, and returnes the MIME token for that part. +# +# Arguments: +# args Args can be any one of the following: +# ?-canonical type/subtype +# ?-param {key value}?... +# ?-encoding value? +# ?-header {key value}?... ? +# (-file name | -string value | -parts {token1 ... tokenN}) +# +# If the -canonical option is present, then the body is in +# canonical (raw) form and is found by consulting either the -file, +# -string, or -parts option. +# +# In addition, both the -param and -header options may occur zero +# or more times to specify "Content-Type" parameters (e.g., +# "charset") and header keyword/values (e.g., +# "Content-Disposition"), respectively. +# +# Also, -encoding, if present, specifies the +# "Content-Transfer-Encoding" when copying the body. +# +# If the -canonical option is not present, then the MIME part +# contained in either the -file or the -string option is parsed, +# dynamically generating subordinates as appropriate. +# +# Results: +# An initialized mime token. + +proc ::mime::initialize args { + global errorCode errorInfo + + variable mime + + set token [namespace current]::[incr mime(uid)] + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[catch [list mime::initializeaux $token {*}$args] result eopts]} { + catch {mime::finalize $token -subordinates dynamic} + return -options $eopts $result + } + return $token +} + +# ::mime::initializeaux -- +# +# Configures the MIME token created in mime::initialize based on +# the arguments that mime::initialize supports. +# +# Arguments: +# token The MIME token to configure. +# args Args can be any one of the following: +# ?-canonical type/subtype +# ?-param {key value}?... +# ?-encoding value? +# ?-header {key value}?... ? +# (-file name | -string value | -parts {token1 ... tokenN}) +# +# Results: +# Either configures the mime token, or throws an error. + +proc ::mime::initializeaux {token args} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set params [set state(params) {}] + set state(encoding) {} + set state(version) 1.0 + + set state(header) {} + set state(lowerL) {} + set state(mixedL) {} + + set state(cid) 0 + + set userheader 0 + + set argc [llength $args] + for {set argx 0} {$argx < $argc} {incr argx} { + set option [lindex $args $argx] + if {[incr argx] >= $argc} { + error "missing argument to $option" + } + set value [lindex $args $argx] + + switch -- $option { + -canonical { + set state(content) [string tolower $value] + } + + -param { + if {[llength $value] != 2} { + error "-param expects a key and a value, not $value" + } + set lower [string tolower [set mixed [lindex $value 0]]] + if {[info exists params($lower)]} { + error "the $mixed parameter may be specified at most once" + } + + set params($lower) [lindex $value 1] + set state(params) [array get params] + } + + -encoding { + set value [string tolower $value[set value {}]] + + switch -- $value { + 7bit - 8bit - binary - quoted-printable - base64 { + } + + default { + error "unknown value for -encoding $state(encoding)" + } + } + set state(encoding) [string tolower $value] + } + + -header { + if {[llength $value] != 2} { + error "-header expects a key and a value, not $value" + } + set lower [string tolower [set mixed [lindex $value 0]]] + if {$lower eq {content-type}} { + error "use -canonical instead of -header $value" + } + if {$lower eq {content-transfer-encoding}} { + error "use -encoding instead of -header $value" + } + if {$lower in {content-md5 mime-version}} { + error {don't go there...} + } + if {$lower ni $state(lowerL)} { + lappend state(lowerL) $lower + lappend state(mixedL) $mixed + } + + set userheader 1 + + array set header $state(header) + lappend header($lower) [lindex $value 1] + set state(header) [array get header] + } + + -file { + set state(file) $value + } + + -parts { + set state(parts) $value + } + + -string { + set state(string) $value + + set state(lines) [split $value \n] + set state(lines.count) [llength $state(lines)] + set state(lines.current) 0 + } + + -root { + # the following are internal options + + set state(root) $value + } + + -offset { + set state(offset) $value + } + + -count { + set state(count) $value + } + + -lineslist { + set state(lines) $value + set state(lines.count) [llength $state(lines)] + set state(lines.current) 0 + #state(string) is needed, but will be built when required + set state(string) {} + } + + default { + error "unknown option $option" + } + } + } + + #We only want one of -file, -parts or -string: + set valueN 0 + foreach value {file parts string} { + if {[info exists state($value)]} { + set state(value) $value + incr valueN + } + } + if {$valueN != 1 && ![info exists state(lines)]} { + error {specify exactly one of -file, -parts, or -string} + } + + if {[set state(canonicalP) [info exists state(content)]]} { + switch -- $state(value) { + file { + set state(offset) 0 + } + + parts { + switch -glob -- $state(content) { + text/* + - + image/* + - + audio/* + - + video/* { + error "-canonical $state(content) and -parts do not mix" + } + + default { + if {$state(encoding) ne {}} { + error {-encoding and -parts do not mix} + } + } + } + } + default {# Go ahead} + } + + if {[lsearch -exact $state(lowerL) content-id] < 0} { + lappend state(lowerL) content-id + lappend state(mixedL) Content-ID + + array set header $state(header) + lappend header(content-id) [uniqueID] + set state(header) [array get header] + } + + set state(version) 1.0 + return + } + + if {$state(params) ne {}} { + error {-param requires -canonical} + } + if {$state(encoding) ne {}} { + error {-encoding requires -canonical} + } + if {$userheader} { + error {-header requires -canonical} + } + if {[info exists state(parts)]} { + error {-parts requires -canonical} + } + + if {[set fileP [info exists state(file)]]} { + if {[set openP [info exists state(root)]]} { + # FRINK: nocheck + variable $state(root) + upvar 0 $state(root) root + + set state(fd) $root(fd) + } else { + set state(root) $token + set state(fd) [open $state(file) RDONLY] + set state(offset) 0 + seek $state(fd) 0 end + set state(count) [tell $state(fd)] + + fconfigure $state(fd) -translation binary + } + } + + set code [catch {mime::parsepart $token} result] + set ecode $errorCode + set einfo $errorInfo + + if {$fileP} { + if {!$openP} { + unset state(root) + catch {close $state(fd)} + } + unset state(fd) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::parsepart -- +# +# Parses the MIME headers and attempts to break up the message +# into its various parts, creating a MIME token for each part. +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# Throws an error if it has problems parsing the MIME token, +# otherwise it just sets up the appropriate variables. + +proc ::mime::parsepart {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[set fileP [info exists state(file)]]} { + seek $state(fd) [set pos $state(offset)] start + set last [expr {$state(offset) + $state(count) - 1}] + } else { + set string $state(string) + } + + set vline {} + while 1 { + set blankP 0 + if {$fileP} { + if {($pos > $last) || ([set x [gets $state(fd) line]] <= 0)} { + set blankP 1 + } else { + incr pos [expr {$x + 1}] + } + } else { + if {$state(lines.current) >= $state(lines.count)} { + set blankP 1 + set line {} + } else { + set line [lindex $state(lines) $state(lines.current)] + incr state(lines.current) + set x [string length $line] + if {$x == 0} {set blankP 1} + } + } + + if {!$blankP && [string match *\r $line]} { + set line [string range $line 0 $x-2]] + if {$x == 1} { + set blankP 1 + } + } + + if {!$blankP && ( + [string first { } $line] == 0 + || + [string first \t $line] == 0 + )} { + append vline \n $line + continue + } + + if {$vline eq {}} { + if {$blankP} { + break + } + + set vline $line + continue + } + + if { + [set x [string first : $vline]] <= 0 + || + [set mixed [string trimright [ + string range $vline 0 [expr {$x - 1}]]]] eq {} + } { + error "improper line in header: $vline" + } + set value [string trim [string range $vline [expr {$x + 1}] end]] + switch -- [set lower [string tolower $mixed]] { + content-type { + if {[info exists state(content)]} { + error "multiple Content-Type fields starting with $vline" + } + + if {![catch {set x [parsetype $token $value]}]} { + set state(content) [lindex $x 0] + set state(params) [lindex $x 1] + } + } + + content-md5 { + } + + content-transfer-encoding { + if { + $state(encoding) ne {} + && + $state(encoding) ne [string tolower $value] + } { + error "multiple Content-Transfer-Encoding fields starting with $vline" + } + + set state(encoding) [string tolower $value] + } + + mime-version { + set state(version) $value + } + + default { + if {[lsearch -exact $state(lowerL) $lower] < 0} { + lappend state(lowerL) $lower + lappend state(mixedL) $mixed + } + + array set header $state(header) + lappend header($lower) $value + set state(header) [array get header] + } + } + + if {$blankP} { + break + } + set vline $line + } + + if {![info exists state(content)]} { + set state(content) text/plain + set state(params) [list charset us-ascii] + } + + if {![string match multipart/* $state(content)]} { + if {$fileP} { + set x [tell $state(fd)] + incr state(count) [expr {$state(offset) - $x}] + set state(offset) $x + } else { + # rebuild string, this is cheap and needed by other functions + set state(string) [join [ + lrange $state(lines) $state(lines.current) end] \n] + } + + if {[string match message/* $state(content)]} { + # FRINK: nocheck + variable [set child $token-[incr state(cid)]] + + set state(value) parts + set state(parts) $child + if {$fileP} { + mime::initializeaux $child \ + -file $state(file) -root $state(root) \ + -offset $state(offset) -count $state(count) + } else { + if {[info exists state(encoding)]} { + set strng [join [ + lrange $state(lines) $state(lines.current) end] \n] + switch -- $state(encoding) { + base64 - + quoted-printable { + set strng [$state(encoding) -mode decode -- $strng] + } + default {} + } + mime::initializeaux $child -string $strng + } else { + mime::initializeaux $child -lineslist [ + lrange $state(lines) $state(lines.current) end] + } + } + } + + return + } + + set state(value) parts + + set boundary {} + foreach {k v} $state(params) { + if {$k eq {boundary}} { + set boundary $v + break + } + } + if {$boundary eq {}} { + error "boundary parameter is missing in $state(content)" + } + if {[string trim $boundary] eq {}} { + error "boundary parameter is empty in $state(content)" + } + + if {$fileP} { + set pos [tell $state(fd)] + # This variable is like 'start', for the reasons laid out + # below, in the other branch of this conditional. + set initialpos $pos + } else { + # This variable is like 'start', a list of lines in the + # part. This record is made even before we find a starting + # boundary and used if we run into the terminating boundary + # before a starting boundary was found. In that case the lines + # before the terminator as recorded by tracelines are seen as + # the part, or at least we attempt to parse them as a + # part. See the forceoctet and nochild flags later. We cannot + # use 'start' as that records lines only after the starting + # boundary was found. + set tracelines [list] + } + + set inP 0 + set moreP 1 + set forceoctet 0 + while {$moreP} { + if {$fileP} { + if {$pos > $last} { + # We have run over the end of the part per the outer + # information without finding a terminating boundary. + # We now fake the boundary and force the parser to + # give any new part coming of this a mime-type of + # application/octet-stream regardless of header + # information. + set line "--$boundary--" + set x [string length $line] + set forceoctet 1 + } else { + if {[set x [gets $state(fd) line]] < 0} { + error "end-of-file encountered while parsing $state(content)" + } + } + incr pos [expr {$x + 1}] + } else { + if {$state(lines.current) >= $state(lines.count)} { + error "end-of-string encountered while parsing $state(content)" + } else { + set line [lindex $state(lines) $state(lines.current)] + incr state(lines.current) + set x [string length $line] + } + set x [string length $line] + } + if {[string last \r $line] == $x - 1} { + set line [string range $line 0 [expr {$x - 2}]] + set crlf 2 + } else { + set crlf 1 + } + + if {[string first --$boundary $line] != 0} { + if {$inP && !$fileP} { + lappend start $line + } + continue + } else { + lappend tracelines $line + } + + if {!$inP} { + # Haven't seen the starting boundary yet. Check if the + # current line contains this starting boundary. + + if {$line eq "--$boundary"} { + # Yes. Switch parser state to now search for the + # terminating boundary of the part and record where + # the part begins (or initialize the recorder for the + # lines in the part). + set inP 1 + if {$fileP} { + set start $pos + } else { + set start [list] + } + continue + } elseif {$line eq "--$boundary--"} { + # We just saw a terminating boundary before we ever + # saw the starting boundary of a part. This forces us + # to stop parsing, we do this by forcing the parser + # into an accepting state. We will try to create a + # child part based on faked start position or recorded + # lines, or, if that fails, let the current part have + # no children. + + # As an example note the test case mime-3.7 and the + # referenced file "badmail1.txt". + + set inP 1 + if {$fileP} { + set start $initialpos + } else { + set start $tracelines + } + set forceoctet 1 + # Fall through. This brings to the creation of the new + # part instead of searching further and possible + # running over the end. + } else { + continue + } + } + + # Looking for the end of the current part. We accept both a + # terminating boundary and the starting boundary of the next + # part as the end of the current part. + + if {[set moreP [string compare $line --$boundary--]] + && $line ne "--$boundary"} { + + # The current part has not ended, so we record the line + # if we are inside a part and doing string parsing. + if {$inP && !$fileP} { + lappend start $line + } + continue + } + + # The current part has ended. We now determine the exact + # boundaries, create a mime part object for it and recursively + # parse it deeper as part of that action. + + # FRINK: nocheck + variable [set child $token-[incr state(cid)]] + + lappend state(parts) $child + + set nochild 0 + if {$fileP} { + if {[set count [expr {$pos - ($start + $x + $crlf + 1)}]] < 0} { + set count 0 + } + if {$forceoctet} { + set ::errorInfo {} + if {[catch { + mime::initializeaux $child \ + -file $state(file) -root $state(root) \ + -offset $start -count $count + }]} { + set nochild 1 + set state(parts) [lrange $state(parts) 0 end-1] + } } else { + mime::initializeaux $child \ + -file $state(file) -root $state(root) \ + -offset $start -count $count + } + seek $state(fd) [set start $pos] start + } else { + if {$forceoctet} { + if {[catch { + mime::initializeaux $child -lineslist $start + }]} { + set nochild 1 + set state(parts) [lrange $state(parts) 0 end-1] + } + } else { + mime::initializeaux $child -lineslist $start + } + set start {} + } + if {$forceoctet && !$nochild} { + variable $child + upvar 0 $child childstate + set childstate(content) application/octet-stream + } + set forceoctet 0 + } +} + +# ::mime::parsetype -- +# +# Parses the string passed in and identifies the content-type and +# params strings. +# +# Arguments: +# token The MIME token to parse. +# string The content-type string that should be parsed. +# +# Results: +# Returns the content and params for the string as a two element +# tcl list. + +proc ::mime::parsetype {token string} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + variable typetokenL + variable typelexemeL + + set state(input) $string + set state(buffer) {} + set state(lastC) LX_END + set state(comment) {} + set state(tokenL) $typetokenL + set state(lexemeL) $typelexemeL + + set code [catch {mime::parsetypeaux $token $string} result] + set ecode $errorCode + set einfo $errorInfo + + unset {*}{ + state(input) + state(buffer) + state(lastC) + state(comment) + state(tokenL) + state(lexemeL) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::parsetypeaux -- +# +# A helper function for mime::parsetype. Parses the specified +# string looking for the content type and params. +# +# Arguments: +# token The MIME token to parse. +# string The content-type string that should be parsed. +# +# Results: +# Returns the content and params for the string as a two element +# tcl list. + +proc ::mime::parsetypeaux {token string} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[parselexeme $token] ne {LX_ATOM}} { + error [format {expecting type (found %s)} $state(buffer)] + } + set type [string tolower $state(buffer)] + + switch -- [parselexeme $token] { + LX_SOLIDUS { + } + + LX_END { + if {$type ne {message}} { + error "expecting type/subtype (found $type)" + } + + return [list message/rfc822 {}] + } + + default { + error [format "expecting \"/\" (found %s)" $state(buffer)] + } + } + + if {[parselexeme $token] ne {LX_ATOM}} { + error [format "expecting subtype (found %s)" $state(buffer)] + } + append type [string tolower /$state(buffer)] + + array set params {} + while 1 { + switch -- [parselexeme $token] { + LX_END { + return [list $type [array get params]] + } + + LX_SEMICOLON { + } + + default { + error [format "expecting \";\" (found %s)" $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_END { + return [list $type [array get params]] + } + + LX_ATOM { + } + + default { + error [format "expecting attribute (found %s)" $state(buffer)] + } + } + + set attribute [string tolower $state(buffer)] + + if {[parselexeme $token] ne {LX_EQUALS}} { + error [format {expecting "=" (found %s)} $state(buffer)] + } + + switch -- [parselexeme $token] { + LX_ATOM { + } + + LX_QSTRING { + set state(buffer) [ + string range $state(buffer) 1 [ + expr {[string length $state(buffer)] - 2}]] + } + + default { + error [format {expecting value (found %s)} $state(buffer)] + } + } + set params($attribute) $state(buffer) + } +} + +# ::mime::finalize -- +# +# mime::finalize destroys a MIME part. +# +# If the -subordinates option is present, it specifies which +# subordinates should also be destroyed. The default value is +# "dynamic". +# +# Arguments: +# token The MIME token to parse. +# args Args can be optionally be of the following form: +# ?-subordinates "all" | "dynamic" | "none"? +# +# Results: +# Returns an empty string. + +proc ::mime::finalize {token args} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set options [list -subordinates dynamic] + array set options $args + + switch -- $options(-subordinates) { + all { + #TODO: this code path is untested + if {$state(value) eq {parts}} { + foreach part $state(parts) { + eval [linsert $args 0 mime::finalize $part] + } + } + } + + dynamic { + for {set cid $state(cid)} {$cid > 0} {incr cid -1} { + eval [linsert $args 0 mime::finalize $token-$cid] + } + } + + none { + } + + default { + error "unknown value for -subordinates $options(-subordinates)" + } + } + + foreach name [array names state] { + unset state($name) + } + # FRINK: nocheck + unset $token +} + +# ::mime::getproperty -- +# +# mime::getproperty returns the properties of a MIME part. +# +# The properties are: +# +# property value +# ======== ===== +# content the type/subtype describing the content +# encoding the "Content-Transfer-Encoding" +# params a list of "Content-Type" parameters +# parts a list of tokens for the part's subordinates +# size the approximate size of the content (unencoded) +# +# The "parts" property is present only if the MIME part has +# subordinates. +# +# If mime::getproperty is invoked with the name of a specific +# property, then the corresponding value is returned; instead, if +# -names is specified, a list of all properties is returned; +# otherwise, a dictionary of properties is returned. +# +# Arguments: +# token The MIME token to parse. +# property One of 'content', 'encoding', 'params', 'parts', and +# 'size'. Defaults to returning a dictionary of +# properties. +# +# Results: +# Returns the properties of a MIME part + +proc ::mime::getproperty {token {property {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -- $property { + {} { + array set properties [list content $state(content) \ + encoding $state(encoding) \ + params $state(params) \ + size [getsize $token]] + if {[info exists state(parts)]} { + set properties(parts) $state(parts) + } + + return [array get properties] + } + + -names { + set names [list content encoding params] + if {[info exists state(parts)]} { + lappend names parts + } + + return $names + } + + content + - + encoding + - + params { + return $state($property) + } + + parts { + if {![info exists state(parts)]} { + error {MIME part is a leaf} + } + + return $state(parts) + } + + size { + return [getsize $token] + } + + default { + error "unknown property $property" + } + } +} + +# ::mime::getsize -- +# +# Determine the size (in bytes) of a MIME part/token +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# Returns the size in bytes of the MIME token. + +proc ::mime::getsize {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -- $state(value)/$state(canonicalP) { + file/0 { + set size $state(count) + } + + file/1 { + return [file size $state(file)] + } + + parts/0 + - + parts/1 { + set size 0 + foreach part $state(parts) { + incr size [getsize $part] + } + + return $size + } + + string/0 { + set size [string length $state(string)] + } + + string/1 { + return [string length $state(string)] + } + default { + error "Unknown combination \"$state(value)/$state(canonicalP)\"" + } + } + + if {$state(encoding) eq {base64}} { + set size [expr {($size * 3 + 2) / 4}] + } + + return $size +} + + +proc ::mime::getContentType token { + variable $token + upvar 0 $token state + set boundary {} + set res $state(content) + foreach {k v} $state(params) { + set boundary $v + append res ";\n $k=\"$v\"" + } + if {([string match multipart/* $state(content)]) \ + && ($boundary eq {})} { + # we're doing everything in one pass... + set key [clock seconds]$token[info hostname][array get state] + set seqno 8 + while {[incr seqno -1] >= 0} { + set key [md5 -- $key] + } + set boundary "----- =_[string trim [base64 -mode encode -- $key]]" + + append res ";\n boundary=\"$boundary\"" + } + return $res +} + +# ::mime::getheader -- +# +# mime::getheader returns the header of a MIME part. +# +# A header consists of zero or more key/value pairs. Each value is a +# list containing one or more strings. +# +# If mime::getheader is invoked with the name of a specific key, then +# a list containing the corresponding value(s) is returned; instead, +# if -names is specified, a list of all keys is returned; otherwise, a +# dictionary is returned. Note that when a +# key is specified (e.g., "Subject"), the list returned usually +# contains exactly one string; however, some keys (e.g., "Received") +# often occur more than once in the header, accordingly the list +# returned usually contains more than one string. +# +# Arguments: +# token The MIME token to parse. +# key Either a key or '-names'. If it is '-names' a list +# of all keys is returned. +# +# Results: +# Returns the header of a MIME part. + +proc ::mime::getheader {token {key {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set header $state(header) + switch -- $key { + {} { + set result {} + lappend result MIME-Version $state(version) + foreach lower $state(lowerL) mixed $state(mixedL) { + foreach value $header($lower) { + lappend result $mixed $value + } + } + set tencoding [getTransferEncoding $token] + if {$tencoding ne {}} { + lappend result Content-Transfer-Encoding $tencoding + } + lappend result Content-Type [getContentType $token] + return $result + } + + -names { + return $state(mixedL) + } + + default { + set lower [string tolower $key] + + switch $lower { + content-transfer-encoding { + return [getTransferEncoding $token] + } + content-type { + return [list [getContentType $token]] + } + mime-version { + return [list $state(version)] + } + default { + if {![info exists header($lower)]} { + error "key $key not in header" + } + return $header($lower) + } + } + } + } +} + + +proc ::mime::getTransferEncoding token { + variable $token + upvar 0 $token state + set res {} + if {[set encoding $state(encoding)] eq {}} { + set encoding [encoding $token] + } + if {$encoding ne {}} { + set res $encoding + } + switch -- $encoding { + base64 + - + quoted-printable { + set converter $encoding + } + 7bit - 8bit - binary - {} { + # Bugfix for [#477088], also [#539952] + # Go ahead + } + default { + error "Can't handle content encoding \"$encoding\"" + } + } + return $res +} + +# ::mime::setheader -- +# +# mime::setheader writes, appends to, or deletes the value associated +# with a key in the header. +# +# The value for -mode is one of: +# +# write: the key/value is either created or overwritten (the +# default); +# +# append: a new value is appended for the key (creating it as +# necessary); or, +# +# delete: all values associated with the key are removed (the +# "value" parameter is ignored). +# +# Regardless, mime::setheader returns the previous value associated +# with the key. +# +# Arguments: +# token The MIME token to parse. +# key The name of the key whose value should be set. +# value The value for the header key to be set to. +# args An optional argument of the form: +# ?-mode "write" | "append" | "delete"? +# +# Results: +# Returns previous value associated with the specified key. + +proc ::mime::setheader {token key value args} { + # FRINK: nocheck + variable internal + variable $token + upvar 0 $token state + + array set options [list -mode write] + array set options $args + + set lower [string tolower $key] + array set header $state(header) + if {[set x [lsearch -exact $state(lowerL) $lower]] < 0} { + #TODO: this code path is not tested + if {$options(-mode) eq {delete}} { + error "key $key not in header" + } + + lappend state(lowerL) $lower + lappend state(mixedL) $key + + set result {} + } else { + set result $header($lower) + } + switch -- $options(-mode) { + append - write { + if {!$internal} { + switch -- $lower { + content-md5 + - + content-type + - + content-transfer-encoding + - + mime-version { + set values [getheader $token $lower] + if {$value ni $values} { + error "key $key may not be set" + } + } + default {# Skip key} + } + } + switch -- $options(-mode) { + append { + lappend header($lower) $value + } + write { + set header($lower) [list $value] + } + } + } + delete { + unset header($lower) + set state(lowerL) [lreplace $state(lowerL) $x $x] + set state(mixedL) [lreplace $state(mixedL) $x $x] + } + + default { + error "unknown value for -mode $options(-mode)" + } + } + + set state(header) [array get header] + return $result +} + +# ::mime::getbody -- +# +# mime::getbody returns the body of a leaf MIME part in canonical form. +# +# If the -command option is present, then it is repeatedly invoked +# with a fragment of the body as this: +# +# uplevel #0 $callback [list "data" $fragment] +# +# (The -blocksize option, if present, specifies the maximum size of +# each fragment passed to the callback.) +# When the end of the body is reached, the callback is invoked as: +# +# uplevel #0 $callback "end" +# +# Alternatively, if an error occurs, the callback is invoked as: +# +# uplevel #0 $callback [list "error" reason] +# +# Regardless, the return value of the final invocation of the callback +# is propagated upwards by mime::getbody. +# +# If the -command option is absent, then the return value of +# mime::getbody is a string containing the MIME part's entire body. +# +# Arguments: +# token The MIME token to parse. +# args Optional arguments of the form: +# ?-decode? ?-command callback ?-blocksize octets? ? +# +# Results: +# Returns a string containing the MIME part's entire body, or +# if '-command' is specified, the return value of the command +# is returned. + +proc ::mime::getbody {token args} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + set decode 0 + if {[set pos [lsearch -exact $args -decode]] >= 0} { + set decode 1 + set args [lreplace $args $pos $pos] + } + + array set options [list -command [ + list mime::getbodyaux $token] -blocksize 4096] + array set options $args + if {$options(-blocksize) < 1} { + error "-blocksize expects a positive integer, not $options(-blocksize)" + } + + set code 0 + set ecode {} + set einfo {} + + switch -- $state(value)/$state(canonicalP) { + file/0 { + set fd [open $state(file) RDONLY] + + set code [catch { + fconfigure $fd -translation binary + seek $fd [set pos $state(offset)] start + set last [expr {$state(offset) + $state(count) - 1}] + + set fragment {} + while {$pos <= $last} { + if {[set cc [ + expr {($last - $pos) + 1}]] > $options(-blocksize)} { + set cc $options(-blocksize) + } + incr pos [set len [ + string length [set chunk [read $fd $cc]]]] + switch -exact -- $state(encoding) { + base64 + - + quoted-printable { + if {([set x [string last \n $chunk]] > 0) \ + && ($x + 1 != $len)} { + set chunk [string range $chunk 0 $x] + seek $fd [incr pos [expr {($x + 1) - $len}]] start + } + set chunk [ + $state(encoding) -mode decode -- $chunk] + } + 7bit - 8bit - binary - {} { + # Bugfix for [#477088] + # Go ahead, leave chunk alone + } + default { + error "Can't handle content encoding \"$state(encoding)\"" + } + } + append fragment $chunk + + set cc [expr {$options(-blocksize) - 1}] + while {[string length $fragment] > $options(-blocksize)} { + uplevel #0 $options(-command) [ + list data [string range $fragment 0 $cc]] + + set fragment [ + string range $fragment $options(-blocksize) end] + } + } + if {[string length $fragment] > 0} { + uplevel #0 $options(-command) [list data $fragment] + } + } result] + set ecode $errorCode + set einfo $errorInfo + + catch {close $fd} + } + + file/1 { + set fd [open $state(file) RDONLY] + + set code [catch { + fconfigure $fd -translation binary + + while {[string length [ + set fragment [read $fd $options(-blocksize)]]] > 0} { + uplevel #0 $options(-command) [list data $fragment] + } + } result] + set ecode $errorCode + set einfo $errorInfo + + catch {close $fd} + } + + parts/0 + - + parts/1 { + error {MIME part isn't a leaf} + } + + string/0 + - + string/1 { + switch -- $state(encoding)/$state(canonicalP) { + base64/0 + - + quoted-printable/0 { + set fragment [ + $state(encoding) -mode decode -- $state(string)] + } + + default { + # Not a bugfix for [#477088], but clarification + # This handles no-encoding, 7bit, 8bit, and binary. + set fragment $state(string) + } + } + + set code [catch { + set cc [expr {$options(-blocksize) -1}] + while {[string length $fragment] > $options(-blocksize)} { + uplevel #0 $options(-command) [ + list data [string range $fragment 0 $cc]] + + set fragment [ + string range $fragment $options(-blocksize) end] + } + if {[string length $fragment] > 0} { + uplevel #0 $options(-command) [list data $fragment] + } + } result] + set ecode $errorCode + set einfo $errorInfo + } + default { + error "Unknown combination \"$state(value)/$state(canonicalP)\"" + } + } + + set code [catch { + if {$code} { + uplevel #0 $options(-command) [list error $result] + } else { + uplevel #0 $options(-command) [list end] + } + } result] + set ecode $errorCode + set einfo $errorInfo + + if {$code} { + return -code $code -errorinfo $einfo -errorcode $ecode $result + } + + if {$decode} { + array set params [mime::getproperty $token params] + + if {[info exists params(charset)]} { + set charset $params(charset) + } else { + set charset US-ASCII + } + + set enc [reversemapencoding $charset] + if {$enc ne {}} { + set result [::encoding convertfrom $enc $result] + } else { + return -code error "-decode failed: can't reversemap charset $charset" + } + } + + return $result +} + +# ::mime::getbodyaux -- +# +# Builds up the body of the message, fragment by fragment. When +# the entire message has been retrieved, it is returned. +# +# Arguments: +# token The MIME token to parse. +# reason One of 'data', 'end', or 'error'. +# fragment The section of data data fragment to extract a +# string from. +# +# Results: +# Returns nothing, except when called with the 'end' argument +# in which case it returns a string that contains all of the +# data that 'getbodyaux' has been called with. Will throw an +# error if it is called with the reason of 'error'. + +proc ::mime::getbodyaux {token reason {fragment {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch $reason { + data { + append state(getbody) $fragment + return {} + } + + end { + if {[info exists state(getbody)]} { + set result $state(getbody) + unset state(getbody) + } else { + set result {} + } + + return $result + } + + error { + catch {unset state(getbody)} + error $reason + } + + default { + error "Unknown reason \"$reason\"" + } + } +} + +# ::mime::copymessage -- +# +# mime::copymessage copies the MIME part to the specified channel. +# +# mime::copymessage operates synchronously, and uses fileevent to +# allow asynchronous operations to proceed independently. +# +# Arguments: +# token The MIME token to parse. +# channel The channel to copy the message to. +# +# Results: +# Returns nothing unless an error is thrown while the message +# is being written to the channel. + +proc ::mime::copymessage {token channel} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + set openP [info exists state(fd)] + + set code [catch {mime::copymessageaux $token $channel} result] + set ecode $errorCode + set einfo $errorInfo + + if {!$openP && [info exists state(fd)]} { + if {![info exists state(root)]} { + catch {close $state(fd)} + } + unset state(fd) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::copymessageaux -- +# +# mime::copymessageaux copies the MIME part to the specified channel. +# +# Arguments: +# token The MIME token to parse. +# channel The channel to copy the message to. +# +# Results: +# Returns nothing unless an error is thrown while the message +# is being written to the channel. + +proc ::mime::copymessageaux {token channel} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set header $state(header) + + set boundary {} + + set result {} + foreach {mixed value} [getheader $token] { + puts $channel "$mixed: $value" + } + + foreach {k v} $state(params) { + if {$k eq {boundary}} { + set boundary $v + } + } + + set converter {} + set encoding {} + if {$state(value) ne {parts}} { + if {$state(canonicalP)} { + if {[set encoding $state(encoding)] eq {}} { + set encoding [encoding $token] + } + if {$encoding ne {}} { + puts $channel "Content-Transfer-Encoding: $encoding" + } + switch -- $encoding { + base64 + - + quoted-printable { + set converter $encoding + } + 7bit - 8bit - binary - {} { + # Bugfix for [#477088], also [#539952] + # Go ahead + } + default { + error "Can't handle content encoding \"$encoding\"" + } + } + } + } elseif {([string match multipart/* $state(content)]) \ + && ($boundary eq {})} { + # we're doing everything in one pass... + set key [clock seconds]$token[info hostname][array get state] + set seqno 8 + while {[incr seqno -1] >= 0} { + set key [md5 -- $key] + } + set boundary "----- =_[string trim [base64 -mode encode -- $key]]" + + puts $channel ";\n boundary=\"$boundary\"" + } + + if {[info exists state(error)]} { + unset state(error) + } + + switch -- $state(value) { + file { + set closeP 1 + if {[info exists state(root)]} { + # FRINK: nocheck + variable $state(root) + upvar 0 $state(root) root + + if {[info exists root(fd)]} { + set fd $root(fd) + set closeP 0 + } else { + set fd [set state(fd) [open $state(file) RDONLY]] + } + set size $state(count) + } else { + set fd [set state(fd) [open $state(file) RDONLY]] + # read until eof + set size -1 + } + seek $fd $state(offset) start + if {$closeP} { + fconfigure $fd -translation binary + } + + puts $channel {} + + while {$size != 0 && ![eof $fd]} { + if {$size < 0 || $size > 32766} { + set X [read $fd 32766] + } else { + set X [read $fd $size] + } + if {$size > 0} { + set size [expr {$size - [string length $X]}] + } + if {$converter eq {}} { + puts -nonewline $channel $X + } else { + puts -nonewline $channel [$converter -mode encode -- $X] + } + } + + if {$closeP} { + catch {close $state(fd)} + unset state(fd) + } + } + + parts { + if { + ![info exists state(root)] + && + [info exists state(file)] + } { + set state(fd) [open $state(file) RDONLY] + fconfigure $state(fd) -translation binary + } + + switch -glob -- $state(content) { + message/* { + puts $channel {} + foreach part $state(parts) { + mime::copymessage $part $channel + break + } + } + + default { + # Note RFC 2046: See buildmessageaux for details. + # + # The boundary delimiter MUST occur at the + # beginning of a line, i.e., following a CRLF, and + # the initial CRLF is considered to be attached to + # the boundary delimiter line rather than part of + # the preceding part. + # + # - The above means that the CRLF before $boundary + # is needed per the RFC, and the parts must not + # have a closing CRLF of their own. See Tcllib bug + # 1213527, and patch 1254934 for the problems when + # both file/string branches added CRLF after the + # body parts. + + + foreach part $state(parts) { + puts $channel \n--$boundary + mime::copymessage $part $channel + } + puts $channel \n--$boundary-- + } + } + + if {[info exists state(fd)]} { + catch {close $state(fd)} + unset state(fd) + } + } + + string { + if {[catch {fconfigure $channel -buffersize} blocksize]} { + set blocksize 4096 + } elseif {$blocksize < 512} { + set blocksize 512 + } + set blocksize [expr {($blocksize / 4) * 3}] + + # [893516] + fconfigure $channel -buffersize $blocksize + + puts $channel {} + + #TODO: tests don't cover these paths + if {$converter eq {}} { + puts -nonewline $channel $state(string) + } else { + puts -nonewline $channel [$converter -mode encode -- $state(string)] + } + } + default { + error "Unknown value \"$state(value)\"" + } + } + + flush $channel + + if {[info exists state(error)]} { + error $state(error) + } +} + +# ::mime::buildmessage -- +# +# Like copymessage, but produces a string rather than writing the message into a channel. +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# The message. + +proc ::mime::buildmessage token { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + set openP [info exists state(fd)] + + set code [catch {mime::buildmessageaux $token} result] + if {![info exists errorCode]} { + set ecode {} + } else { + set ecode $errorCode + } + set einfo $errorInfo + + if {!$openP && [info exists state(fd)]} { + if {![info exists state(root)]} { + catch {close $state(fd)} + } + unset state(fd) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + + +proc ::mime::buildmessageaux token { + set chan [tcl::chan::memchan] + chan configure $chan -translation crlf + copymessageaux $token $chan + seek $chan 0 + chan configure $chan -translation binary + set res [read $chan] + close $chan + return $res +} + +# ::mime::encoding -- +# +# Determines how a token is encoded. +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# Returns the encoding of the message (the null string, base64, +# or quoted-printable). + +proc ::mime::encoding {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -glob -- $state(content) { + audio/* + - + image/* + - + video/* { + return base64 + } + + message/* + - + multipart/* { + return {} + } + default {# Skip} + } + + set asciiP 1 + set lineP 1 + switch -- $state(value) { + file { + set fd [open $state(file) RDONLY] + fconfigure $fd -translation binary + + while {[gets $fd line] >= 0} { + if {$asciiP} { + set asciiP [encodingasciiP $line] + } + if {$lineP} { + set lineP [encodinglineP $line] + } + if {(!$asciiP) && (!$lineP)} { + break + } + } + + catch {close $fd} + } + + parts { + return {} + } + + string { + foreach line [split $state(string) "\n"] { + if {$asciiP} { + set asciiP [encodingasciiP $line] + } + if {$lineP} { + set lineP [encodinglineP $line] + } + if {(!$asciiP) && (!$lineP)} { + break + } + } + } + default { + error "Unknown value \"$state(value)\"" + } + } + + switch -glob -- $state(content) { + text/* { + if {!$asciiP} { + #TODO: this path is not covered by tests + foreach {k v} $state(params) { + if {$k eq "charset"} { + set v [string tolower $v] + if {($v ne "us-ascii") \ + && (![string match {iso-8859-[1-8]} $v])} { + return base64 + } + + break + } + } + } + + if {!$lineP} { + return quoted-printable + } + } + + + default { + if {(!$asciiP) || (!$lineP)} { + return base64 + } + } + } + + return {} +} + +# ::mime::encodingasciiP -- +# +# Checks if a string is a pure ascii string, or if it has a non-standard +# form. +# +# Arguments: +# line The line to check. +# +# Results: +# Returns 1 if \r only occurs at the end of lines, and if all +# characters in the line are between the ASCII codes of 32 and 126. + +proc ::mime::encodingasciiP {line} { + foreach c [split $line {}] { + switch -- $c { + { } - \t - \r - \n { + } + + default { + binary scan $c c c + if {($c < 32) || ($c > 126)} { + return 0 + } + } + } + } + if { + [set r [string first \r $line]] < 0 + || + $r == {[string length $line] - 1} + } { + return 1 + } + + return 0 +} + +# ::mime::encodinglineP -- +# +# Checks if a string is a line is valid to be processed. +# +# Arguments: +# line The line to check. +# +# Results: +# Returns 1 the line is less than 76 characters long, the line +# contains more characters than just whitespace, the line does +# not start with a '.', and the line does not start with 'From '. + +proc ::mime::encodinglineP {line} { + if {([string length $line] > 76) \ + || ($line ne [string trimright $line]) \ + || ([string first . $line] == 0) \ + || ([string first {From } $line] == 0)} { + return 0 + } + + return 1 +} + +# ::mime::fcopy -- +# +# Appears to be unused. +# +# Arguments: +# +# Results: +# + +proc ::mime::fcopy {token count {error {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {$error ne {}} { + set state(error) $error + } + set state(doneP) 1 +} + +# ::mime::scopy -- +# +# Copy a portion of the contents of a mime token to a channel. +# +# Arguments: +# token The token containing the data to copy. +# channel The channel to write the data to. +# offset The location in the string to start copying +# from. +# len The amount of data to write. +# blocksize The block size for the write operation. +# +# Results: +# The specified portion of the string in the mime token is +# copied to the specified channel. + +proc ::mime::scopy {token channel offset len blocksize} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {$len <= 0} { + set state(doneP) 1 + fileevent $channel writable {} + return + } + + if {[set cc $len] > $blocksize} { + set cc $blocksize + } + + if {[catch { + puts -nonewline $channel [ + string range $state(string) $offset [expr {$offset + $cc - 1}]] + fileevent $channel writable [ + list mime::scopy $token $channel [ + incr offset $cc] [incr len -$cc] $blocksize] + } result] + } { + set state(error) $result + set state(doneP) 1 + fileevent $channel writable {} + } + return +} + +# ::mime::qp_encode -- +# +# Tcl version of quote-printable encode +# +# Arguments: +# string The string to quote. +# encoded_word Boolean value to determine whether or not encoded words +# (RFC 2047) should be handled or not. (optional) +# +# Results: +# The properly quoted string is returned. + +proc ::mime::qp_encode {string {encoded_word 0} {no_softbreak 0}} { + # 8.1+ improved string manipulation routines used. + # Replace outlying characters, characters that would normally + # be munged by EBCDIC gateways, and special Tcl characters "[\]{} + # with =xx sequence + + if {$encoded_word} { + # Special processing for encoded words (RFC 2047) + set regexp {[\x00-\x08\x0B-\x1E\x21-\x24\x3D\x40\x5B-\x5E\x60\x7B-\xFF\x09\x5F\x3F]} + lappend mapChars { } _ + } else { + set regexp {[\x00-\x08\x0B-\x1E\x21-\x24\x3D\x40\x5B-\x5E\x60\x7B-\xFF]} + } + regsub -all -- $regexp $string {[format =%02X [scan "\\&" %c]]} string + + # Replace the format commands with their result + + set string [subst -novariables $string] + + # soft/hard newlines and other + # Funky cases for SMTP compatibility + lappend mapChars " \n" =20\n \t\n =09\n \n\.\n =2E\n "\nFrom " "\n=46rom " + + set string [string map $mapChars $string] + + # Break long lines - ugh + + # Implementation of FR #503336 + if {$no_softbreak} { + set result $string + } else { + set result {} + foreach line [split $string \n] { + while {[string length $line] > 72} { + set chunk [string range $line 0 72] + if {[regexp -- (=|=.)$ $chunk dummy end]} { + + # Don't break in the middle of a code + + set len [expr {72 - [string length $end]}] + set chunk [string range $line 0 $len] + incr len + set line [string range $line $len end] + } else { + set line [string range $line 73 end] + } + append result $chunk=\n + } + append result $line\n + } + + # Trim off last \n, since the above code has the side-effect + # of adding an extra \n to the encoded string and return the + # result. + set result [string range $result 0 end-1] + } + + # If the string ends in space or tab, replace with =xx + + set lastChar [string index $result end] + if {$lastChar eq { }} { + set result [string replace $result end end =20] + } elseif {$lastChar eq "\t"} { + set result [string replace $result end end =09] + } + + return $result +} + +# ::mime::qp_decode -- +# +# Tcl version of quote-printable decode +# +# Arguments: +# string The quoted-printable string to decode. +# encoded_word Boolean value to determine whether or not encoded words +# (RFC 2047) should be handled or not. (optional) +# +# Results: +# The decoded string is returned. + +proc ::mime::qp_decode {string {encoded_word 0}} { + # 8.1+ improved string manipulation routines used. + # Special processing for encoded words (RFC 2047) + + if {$encoded_word} { + # _ == \x20, even if SPACE occupies a different code position + set string [string map [list _ \u0020] $string] + } + + # smash the white-space at the ends of lines since that must've been + # generated by an MUA. + + regsub -all -- {[ \t]+\n} $string \n string + set string [string trimright $string " \t"] + + # Protect the backslash for later subst and + # smash soft newlines, has to occur after white-space smash + # and any encoded word modification. + + #TODO: codepath not tested + set string [string map [list \\ {\\} =\n {}] $string] + + # Decode specials + + regsub -all -nocase {=([a-f0-9][a-f0-9])} $string {\\u00\1} string + + # process \u unicode mapped chars + + return [subst -novariables -nocommands $string] +} + +# ::mime::parseaddress -- +# +# This was originally written circa 1982 in C. we're still using it +# because it recognizes virtually every buggy address syntax ever +# generated! +# +# mime::parseaddress takes a string containing one or more 822-style +# address specifications and returns a list of dictionaries, for each +# address specified in the argument. +# +# Each dictionary contains these properties: +# +# property value +# ======== ===== +# address local@domain +# comment 822-style comment +# domain the domain part (rhs) +# error non-empty on a parse error +# group this address begins a group +# friendly user-friendly rendering +# local the local part (lhs) +# memberP this address belongs to a group +# phrase the phrase part +# proper 822-style address specification +# route 822-style route specification (obsolete) +# +# Note that one or more of these properties may be empty. +# +# Arguments: +# string The address string to parse +# +# Results: +# Returns a list of dictionaries, one element for each address +# specified in the argument. + +proc ::mime::parseaddress {string} { + global errorCode errorInfo + + variable mime + + set token [namespace current]::[incr mime(uid)] + # FRINK: nocheck + variable $token + upvar 0 $token state + + set code [catch {mime::parseaddressaux $token $string} result] + set ecode $errorCode + set einfo $errorInfo + + foreach name [array names state] { + unset state($name) + } + # FRINK: nocheck + catch {unset $token} + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::parseaddressaux -- +# +# This was originally written circa 1982 in C. we're still using it +# because it recognizes virtually every buggy address syntax ever +# generated! +# +# mime::parseaddressaux does the actually parsing for mime::parseaddress +# +# Each dictionary contains these properties: +# +# property value +# ======== ===== +# address local@domain +# comment 822-style comment +# domain the domain part (rhs) +# error non-empty on a parse error +# group this address begins a group +# friendly user-friendly rendering +# local the local part (lhs) +# memberP this address belongs to a group +# phrase the phrase part +# proper 822-style address specification +# route 822-style route specification (obsolete) +# +# Note that one or more of these properties may be empty. +# +# Arguments: +# token The MIME token to work from. +# string The address string to parse +# +# Results: +# Returns a list of dictionaries, one for each address specified in the +# argument. + +proc ::mime::parseaddressaux {token string} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + variable addrtokenL + variable addrlexemeL + + set state(input) $string + set state(glevel) 0 + set state(buffer) {} + set state(lastC) LX_END + set state(tokenL) $addrtokenL + set state(lexemeL) $addrlexemeL + + set result {} + while {[addr_next $token]} { + if {[set tail $state(domain)] ne {}} { + set tail @$state(domain) + } else { + set tail @[info hostname] + } + if {[set address $state(local)] ne {}} { + #TODO: this path is not covered by tests + append address $tail + } + + if {$state(phrase) ne {}} { + #TODO: this path is not covered by tests + set state(phrase) [string trim $state(phrase) \"] + foreach t $state(tokenL) { + if {[string first $t $state(phrase)] >= 0} { + #TODO: is this quoting robust enough? + set state(phrase) \"$state(phrase)\" + break + } + } + + set proper "$state(phrase) <$address>" + } else { + set proper $address + } + + if {[set friendly $state(phrase)] eq {}} { + #TODO: this path is not covered by tests + if {[set note $state(comment)] ne {}} { + if {[string first ( $note] == 0} { + set note [string trimleft [string range $note 1 end]] + } + if { + [string last ) $note] + == [set len [expr {[string length $note] - 1}]] + } { + set note [string range $note 0 [expr {$len - 1}]] + } + set friendly $note + } + + if { + $friendly eq {} + && + [set mbox $state(local)] ne {} + } { + #TODO: this path is not covered by tests + set mbox [string trim $mbox \"] + + if {[string first / $mbox] != 0} { + set friendly $mbox + } elseif {[set friendly [addr_x400 $mbox PN]] ne {}} { + } elseif { + [set friendly [addr_x400 $mbox S]] ne {} + && + [set g [addr_x400 $mbox G]] ne {} + } { + set friendly "$g $friendly" + } + + if {$friendly eq {}} { + set friendly $mbox + } + } + } + set friendly [string trim $friendly \"] + + lappend result [list address $address \ + comment $state(comment) \ + domain $state(domain) \ + error $state(error) \ + friendly $friendly \ + group $state(group) \ + local $state(local) \ + memberP $state(memberP) \ + phrase $state(phrase) \ + proper $proper \ + route $state(route)] + + } + + unset {*}{ + state(input) + state(glevel) + state(buffer) + state(lastC) + state(tokenL) + state(lexemeL) + } + + return $result +} + +# ::mime::addr_next -- +# +# Locate the next address in a mime token. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns 1 if there is another address, and 0 if there is not. + +proc ::mime::addr_next {token} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + set nocomplain [package vsatisfies [package provide Tcl] 8.4] + foreach prop {comment domain error group local memberP phrase route} { + if {$nocomplain} { + unset -nocomplain state($prop) + } else { + if {[catch {unset state($prop)}]} {set ::errorInfo {}} + } + } + + switch -- [set code [catch {mime::addr_specification $token} result]] { + 0 { + if {!$result} { + return 0 + } + + switch -- $state(lastC) { + LX_COMMA + - + LX_END { + } + default { + # catch trailing comments... + set lookahead $state(input) + mime::parselexeme $token + set state(input) $lookahead + } + } + } + + 7 { + set state(error) $result + + while {1} { + switch -- $state(lastC) { + LX_COMMA + - + LX_END { + break + } + + default { + mime::parselexeme $token + } + } + } + } + + default { + set ecode $errorCode + set einfo $errorInfo + + return -code $code -errorinfo $einfo -errorcode $ecode $result + } + } + + foreach prop {comment domain error group local memberP phrase route} { + if {![info exists state($prop)]} { + set state($prop) {} + } + } + + return 1 +} + +# ::mime::addr_specification -- +# +# Uses lookahead parsing to determine whether there is another +# valid e-mail address or not. Throws errors if unrecognized +# or invalid e-mail address syntax is used. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns 1 if there is another address, and 0 if there is not. + +proc ::mime::addr_specification {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set lookahead $state(input) + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + set state(phrase) $state(buffer) + } + + LX_SEMICOLON { + if {[incr state(glevel) -1] < 0} { + return -code 7 "extraneous semi-colon" + } + + catch {unset state(comment)} + return [addr_specification $token] + } + + LX_COMMA { + catch {unset state(comment)} + return [addr_specification $token] + } + + LX_END { + return 0 + } + + LX_LBRACKET { + return [addr_routeaddr $token] + } + + LX_ATSIGN { + set state(input) $lookahead + return [addr_routeaddr $token 0] + } + + default { + return -code 7 [ + format "unexpected character at beginning (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + append state(phrase) " " $state(buffer) + + return [addr_phrase $token] + } + + LX_LBRACKET { + return [addr_routeaddr $token] + } + + LX_COLON { + return [addr_group $token] + } + + LX_DOT { + set state(local) "$state(phrase)$state(buffer)" + unset state(phrase) + mime::addr_routeaddr $token 0 + mime::addr_end $token + } + + LX_ATSIGN { + set state(memberP) $state(glevel) + set state(local) $state(phrase) + unset state(phrase) + mime::addr_domain $token + mime::addr_end $token + } + + LX_SEMICOLON + - + LX_COMMA + - + LX_END { + set state(memberP) $state(glevel) + if { + $state(lastC) eq "LX_SEMICOLON" + && + ([incr state(glevel) -1] < 0) + } { + #TODO: this path is not covered by tests + return -code 7 "extraneous semi-colon" + } + + set state(local) $state(phrase) + unset state(phrase) + } + + default { + return -code 7 [ + format "expecting mailbox (found %s)" $state(buffer)] + } + } + + return 1 +} + +# ::mime::addr_routeaddr -- +# +# Parses the domain portion of an e-mail address. Finds the '@' +# sign and then calls mime::addr_route to verify the domain. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns 1 if there is another address, and 0 if there is not. + +proc ::mime::addr_routeaddr {token {checkP 1}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set lookahead $state(input) + if {[parselexeme $token] eq "LX_ATSIGN"} { + #TODO: this path is not covered by tests + mime::addr_route $token + } else { + set state(input) $lookahead + } + + mime::addr_local $token + + switch -- $state(lastC) { + LX_ATSIGN { + mime::addr_domain $token + } + + LX_SEMICOLON + - + LX_RBRACKET + - + LX_COMMA + - + LX_END { + } + + default { + return -code 7 [ + format "expecting at-sign after local-part (found %s)" \ + $state(buffer)] + } + } + + if {($checkP) && ($state(lastC) ne "LX_RBRACKET")} { + return -code 7 [ + format "expecting right-bracket (found %s)" $state(buffer)] + } + + return 1 +} + +# ::mime::addr_route -- +# +# Attempts to parse the portion of the e-mail address after the @. +# Tries to verify that the domain definition has a valid form. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_route {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set state(route) @ + + while 1 { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_DLITERAL { + append state(route) $state(buffer) + } + + default { + return -code 7 \ + [format "expecting sub-route in route-part (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_COMMA { + append state(route) $state(buffer) + while 1 { + switch -- [parselexeme $token] { + LX_COMMA { + } + + LX_ATSIGN { + append state(route) $state(buffer) + break + } + + default { + return -code 7 \ + [format "expecting at-sign in route (found %s)" \ + $state(buffer)] + } + } + } + } + + LX_ATSIGN + - + LX_DOT { + append state(route) $state(buffer) + } + + LX_COLON { + append state(route) $state(buffer) + return + } + + default { + return -code 7 [ + format "expecting colon to terminate route (found %s)" \ + $state(buffer)] + } + } + } +} + +# ::mime::addr_domain -- +# +# Attempts to parse the portion of the e-mail address after the @. +# Tries to verify that the domain definition has a valid form. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_domain {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + while 1 { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_DLITERAL { + append state(domain) $state(buffer) + } + + default { + return -code 7 [ + format "expecting sub-domain in domain-part (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_DOT { + append state(domain) $state(buffer) + } + + LX_ATSIGN { + append state(local) % $state(domain) + unset state(domain) + } + + default { + return + } + } + } +} + +# ::mime::addr_local -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_local {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set state(memberP) $state(glevel) + + while 1 { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + append state(local) $state(buffer) + } + + default { + return -code 7 \ + [format "expecting mailbox in local-part (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_DOT { + append state(local) $state(buffer) + } + + default { + return + } + } + } +} + +# ::mime::addr_phrase -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + + +proc ::mime::addr_phrase {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + while {1} { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + append state(phrase) " " $state(buffer) + } + + default { + break + } + } + } + + switch -- $state(lastC) { + LX_LBRACKET { + return [addr_routeaddr $token] + } + + LX_COLON { + return [addr_group $token] + } + + LX_DOT { + append state(phrase) $state(buffer) + return [addr_phrase $token] + } + + default { + return -code 7 [ + format "found phrase instead of mailbox (%s%s)" \ + $state(phrase) $state(buffer)] + } + } +} + +# ::mime::addr_group -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_group {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[incr state(glevel)] > 1} { + return -code 7 [ + format "nested groups not allowed (found %s)" $state(phrase)] + } + + set state(group) $state(phrase) + unset state(phrase) + + set lookahead $state(input) + while 1 { + switch -- [parselexeme $token] { + LX_SEMICOLON + - + LX_END { + set state(glevel) 0 + return 1 + } + + LX_COMMA { + } + + default { + set state(input) $lookahead + return [addr_specification $token] + } + } + } +} + +# ::mime::addr_end -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_end {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -- $state(lastC) { + LX_SEMICOLON { + if {[incr state(glevel) -1] < 0} { + return -code 7 "extraneous semi-colon" + } + } + + LX_COMMA + - + LX_END { + } + + default { + return -code 7 [ + format "junk after local@domain (found %s)" $state(buffer)] + } + } +} + +# ::mime::addr_x400 -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_x400 {mbox key} { + if {[set x [string first /$key= [string toupper $mbox]]] < 0} { + return {} + } + set mbox [string range $mbox [expr {$x + [string length $key] + 2}] end] + + if {[set x [string first / $mbox]] > 0} { + set mbox [string range $mbox 0 [expr {$x - 1}]] + } + + return [string trim $mbox \"] +} + +# ::mime::parsedatetime -- +# +# Fortunately the clock command in the Tcl 8.x core does all the heavy +# lifting for us (except for timezone calculations). +# +# mime::parsedatetime takes a string containing an 822-style date-time +# specification and returns the specified property. +# +# The list of properties and their ranges are: +# +# property range +# ======== ===== +# clock raw result of "clock scan" +# hour 0 .. 23 +# lmonth January, February, ..., December +# lweekday Sunday, Monday, ... Saturday +# mday 1 .. 31 +# min 0 .. 59 +# mon 1 .. 12 +# month Jan, Feb, ..., Dec +# proper 822-style date-time specification +# rclock elapsed seconds between then and now +# sec 0 .. 59 +# wday 0 .. 6 (Sun .. Mon) +# weekday Sun, Mon, ..., Sat +# yday 1 .. 366 +# year 1900 ... +# zone -720 .. 720 (minutes east of GMT) +# +# Arguments: +# value Either a 822-style date-time specification or '-now' +# if the current date/time should be used. +# property The property (from the list above) to return +# +# Results: +# Returns the string value of the 'property' for the date/time that was +# specified in 'value'. + +namespace eval ::mime { + variable WDAYS_SHORT [list Sun Mon Tue Wed Thu Fri Sat] + variable WDAYS_LONG [list Sunday Monday Tuesday Wednesday Thursday \ + Friday Saturday] + + # Counting months starts at 1, so just insert a dummy element + # at index 0. + variable MONTHS_SHORT [list {} \ + Jan Feb Mar Apr May Jun \ + Jul Aug Sep Oct Nov Dec] + variable MONTHS_LONG [list {} \ + January February March April May June July \ + August Sepember October November December] +} +proc ::mime::parsedatetime {value property} { + if {$value eq "-now"} { + set clock [clock seconds] + } elseif {[regexp {^(.*) ([+-])([0-9][0-9])([0-9][0-9])$} $value \ + -> value zone_sign zone_hour zone_min] + } { + set clock [clock scan $value -gmt 1] + if {[info exists zone_min]} { + set zone_min [scan $zone_min %d] + set zone_hour [scan $zone_hour %d] + set zone [expr {60 * ($zone_min + 60 * $zone_hour)}] + if {$zone_sign eq "+"} { + set zone -$zone + } + incr clock $zone + } + } else { + set clock [clock scan $value] + } + + switch -- $property { + clock { + return $clock + } + + hour { + set value [clock format $clock -format %H] + } + + lmonth { + variable MONTHS_LONG + return [lindex $MONTHS_LONG \ + [scan [clock format $clock -format %m] %d]] + } + + lweekday { + variable WDAYS_LONG + return [lindex $WDAYS_LONG [clock format $clock -format %w]] + } + + mday { + set value [clock format $clock -format %d] + } + + min { + set value [clock format $clock -format %M] + } + + mon { + set value [clock format $clock -format %m] + } + + month { + variable MONTHS_SHORT + return [lindex $MONTHS_SHORT [ + scan [clock format $clock -format %m] %d]] + } + + proper { + set gmt [clock format $clock -format "%Y-%m-%d %H:%M:%S" -gmt true] + if {[set diff [expr {($clock-[clock scan $gmt]) / 60}]] < 0} { + set s - + set diff [expr {-($diff)}] + } else { + set s + + } + set zone [format %s%02d%02d $s [ + expr {$diff / 60}] [expr {$diff % 60}]] + + variable WDAYS_SHORT + set wday [lindex $WDAYS_SHORT [clock format $clock -format %w]] + variable MONTHS_SHORT + set mon [lindex $MONTHS_SHORT [ + scan [clock format $clock -format %m] %d]] + + return [ + clock format $clock -format "$wday, %d $mon %Y %H:%M:%S $zone"] + } + + rclock { + #TODO: these paths are not covered by tests + if {$value eq "-now"} { + return 0 + } else { + return [expr {[clock seconds] - $clock}] + } + } + + sec { + set value [clock format $clock -format %S] + } + + wday { + return [clock format $clock -format %w] + } + + weekday { + variable WDAYS_SHORT + return [lindex $WDAYS_SHORT [clock format $clock -format %w]] + } + + yday { + set value [clock format $clock -format %j] + } + + year { + set value [clock format $clock -format %Y] + } + + zone { + set value [string trim [string map [list \t { }] $value]] + if {[set x [string last { } $value]] < 0} { + return 0 + } + set value [string range $value [expr {$x + 1}] end] + switch -- [set s [string index $value 0]] { + + - - { + if {$s eq "+"} { + #TODO: This path is not covered by tests + set s {} + } + set value [string trim [string range $value 1 end]] + if {( + [string length $value] != 4) + || + [scan $value %2d%2d h m] != 2 + || + $h > 12 + || + $m > 59 + || + ($h == 12 && $m > 0) + } { + error "malformed timezone-specification: $value" + } + set value $s[expr {$h * 60 + $m}] + } + + default { + set value [string toupper $value] + set z1 [list UT GMT EST EDT CST CDT MST MDT PST PDT] + set z2 [list 0 0 -5 -4 -6 -5 -7 -6 -8 -7] + if {[set x [lsearch -exact $z1 $value]] < 0} { + error "unrecognized timezone-mnemonic: $value" + } + set value [expr {[lindex $z2 $x] * 60}] + } + } + } + + date2gmt + - + date2local + - + dst + - + sday + - + szone + - + tzone + - + default { + error "unknown property $property" + } + } + + if {[set value [string trimleft $value 0]] eq {}} { + #TODO: this path is not covered by tests + set value 0 + } + return $value +} + +# ::mime::uniqueID -- +# +# Used to generate a 'globally unique identifier' for the content-id. +# The id is built from the pid, the current time, the hostname, and +# a counter that is incremented each time a message is sent. +# +# Arguments: +# +# Results: +# Returns the a string that contains the globally unique identifier +# that should be used for the Content-ID of an e-mail message. + +proc ::mime::uniqueID {} { + variable mime + + return <[pid].[clock seconds].[incr mime(cid)]@[info hostname]> +} + +# ::mime::parselexeme -- +# +# Used to implement a lookahead parser. +# +# Arguments: +# token The MIME token to operate on. +# +# Results: +# Returns the next token found by the parser. + +proc ::mime::parselexeme {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set state(input) [string trimleft $state(input)] + + set state(buffer) {} + if {$state(input) eq {}} { + set state(buffer) end-of-input + return [set state(lastC) LX_END] + } + + set c [string index $state(input) 0] + set state(input) [string range $state(input) 1 end] + + if {$c eq "("} { + set noteP 0 + set quoteP 0 + + while 1 { + append state(buffer) $c + + #TODO: some of these paths are not covered by tests + switch -- $c/$quoteP { + (/0 { + incr noteP + } + + \\/0 { + set quoteP 1 + } + + )/0 { + if {[incr noteP -1] < 1} { + if {[info exists state(comment)]} { + append state(comment) { } + } + append state(comment) $state(buffer) + + return [parselexeme $token] + } + } + + default { + set quoteP 0 + } + } + + if {[set c [string index $state(input) 0]] eq {}} { + set state(buffer) "end-of-input during comment" + return [set state(lastC) LX_ERR] + } + set state(input) [string range $state(input) 1 end] + } + } + + if {$c eq "\""} { + set firstP 1 + set quoteP 0 + + while 1 { + append state(buffer) $c + + switch -- $c/$quoteP { + "\\/0" { + set quoteP 1 + } + + "\"/0" { + if {!$firstP} { + return [set state(lastC) LX_QSTRING] + } + set firstP 0 + } + + default { + set quoteP 0 + } + } + + if {[set c [string index $state(input) 0]] eq {}} { + set state(buffer) "end-of-input during quoted-string" + return [set state(lastC) LX_ERR] + } + set state(input) [string range $state(input) 1 end] + } + } + + if {$c eq {[}} { + set quoteP 0 + + while 1 { + append state(buffer) $c + + switch -- $c/$quoteP { + \\/0 { + set quoteP 1 + } + + ]/0 { + return [set state(lastC) LX_DLITERAL] + } + + default { + set quoteP 0 + } + } + + if {[set c [string index $state(input) 0]] eq {}} { + set state(buffer) "end-of-input during domain-literal" + return [set state(lastC) LX_ERR] + } + set state(input) [string range $state(input) 1 end] + } + } + + if {[set x [lsearch -exact $state(tokenL) $c]] >= 0} { + append state(buffer) $c + + return [set state(lastC) [lindex $state(lexemeL) $x]] + } + + while 1 { + append state(buffer) $c + + switch -- [set c [string index $state(input) 0]] { + {} - " " - "\t" - "\n" { + break + } + + default { + if {[lsearch -exact $state(tokenL) $c] >= 0} { + break + } + } + } + + set state(input) [string range $state(input) 1 end] + } + + return [set state(lastC) LX_ATOM] +} + +# ::mime::mapencoding -- +# +# mime::mapencodings maps tcl encodings onto the proper names for their +# MIME charset type. This is only done for encodings whose charset types +# were known. The remaining encodings return {} for now. +# +# Arguments: +# enc The tcl encoding to map. +# +# Results: +# Returns the MIME charset type for the specified tcl encoding, or {} +# if none is known. + +proc ::mime::mapencoding {enc} { + + variable encodings + + if {[info exists encodings($enc)]} { + return $encodings($enc) + } + return {} +} + +# ::mime::reversemapencoding -- +# +# mime::reversemapencodings maps MIME charset types onto tcl encoding names. +# Those that are unknown return {}. +# +# Arguments: +# mimeType The MIME charset to convert into a tcl encoding type. +# +# Results: +# Returns the tcl encoding name for the specified mime charset, or {} +# if none is known. + +proc ::mime::reversemapencoding {mimeType} { + + variable reversemap + + set lmimeType [string tolower $mimeType] + if {[info exists reversemap($lmimeType)]} { + return $reversemap($lmimeType) + } + return {} +} + +# ::mime::word_encode -- +# +# Word encodes strings as per RFC 2047. +# +# Arguments: +# charset The character set to encode the message to. +# method The encoding method (base64 or quoted-printable). +# string The string to encode. +# ?-charset_encoded 0 or 1 Whether the data is already encoded +# in the specified charset (default 1) +# ?-maxlength maxlength The maximum length of each encoded +# word to return (default 66) +# +# Results: +# Returns a word encoded string. + +proc ::mime::word_encode {charset method string {args}} { + + variable encodings + + if {![info exists encodings($charset)]} { + error "unknown charset '$charset'" + } + + if {$encodings($charset) eq {}} { + error "invalid charset '$charset'" + } + + if {$method ne "base64" && $method ne "quoted-printable"} { + error "unknown method '$method', must be base64 or quoted-printable" + } + + # default to encoded and a length that won't make the Subject header to long + array set options [list -charset_encoded 1 -maxlength 66] + array set options $args + + if {$options(-charset_encoded)} { + set unencoded_string [::encoding convertfrom $charset $string] + } else { + set unencoded_string $string + } + + set string_length [string length $unencoded_string] + + if {!$string_length} { + return {} + } + + set string_bytelength [string bytelength $unencoded_string] + + # the 7 is for =?, ?Q?, ?= delimiters of the encoded word + set maxlength [expr {$options(-maxlength) - [string length $encodings($charset)] - 7}] + switch -exact -- $method { + base64 { + if {$maxlength < 4} { + error "maxlength $options(-maxlength) too short for chosen charset and encoding" + } + set count 0 + set maxlength [expr {($maxlength / 4) * 3}] + while {$count < $string_length} { + set length 0 + set enc_string {} + while {$length < $maxlength && $count < $string_length} { + set char [string range $unencoded_string $count $count] + set enc_char [::encoding convertto $charset $char] + if {$length + [string length $enc_char] > $maxlength} { + set length $maxlength + } else { + append enc_string $enc_char + incr count + incr length [string length $enc_char] + } + } + set encoded_word [string map [ + list \n {}] [base64 -mode encode -- $enc_string]] + append result "=?$encodings($charset)?B?$encoded_word?=\n " + } + # Trim off last "\n ", since the above code has the side-effect + # of adding an extra "\n " to the encoded string. + + set result [string range $result 0 end-2] + } + quoted-printable { + if {$maxlength < 1} { + error "maxlength $options(-maxlength) too short for chosen charset and encoding" + } + set count 0 + while {$count < $string_length} { + set length 0 + set encoded_word {} + while {$length < $maxlength && $count < $string_length} { + set char [string range $unencoded_string $count $count] + set enc_char [::encoding convertto $charset $char] + set qp_enc_char [qp_encode $enc_char 1] + set qp_enc_char_length [string length $qp_enc_char] + if {$qp_enc_char_length > $maxlength} { + error "maxlength $options(-maxlength) too short for chosen charset and encoding" + } + if { + $length + [string length $qp_enc_char] > $maxlength + } { + set length $maxlength + } else { + append encoded_word $qp_enc_char + incr count + incr length [string length $qp_enc_char] + } + } + append result "=?$encodings($charset)?Q?$encoded_word?=\n " + } + # Trim off last "\n ", since the above code has the side-effect + # of adding an extra "\n " to the encoded string. + + set result [string range $result 0 end-2] + } + {} { + # Go ahead + } + default { + error "Can't handle content encoding \"$method\"" + } + } + return $result +} + +# ::mime::word_decode -- +# +# Word decodes strings that have been word encoded as per RFC 2047. +# +# Arguments: +# encoded The word encoded string to decode. +# +# Results: +# Returns the string that has been decoded from the encoded message. + +proc ::mime::word_decode {encoded} { + + variable reversemap + + if {[regexp -- {=\?([^?]+)\?(.)\?([^?]*)\?=} $encoded \ + - charset method string] != 1 + } { + error "malformed word-encoded expression '$encoded'" + } + + set enc [reversemapencoding $charset] + if {$enc eq {}} { + error "unknown charset '$charset'" + } + + switch -exact -- $method { + b - + B { + set method base64 + } + q - + Q { + set method quoted-printable + } + default { + error "unknown method '$method', must be B or Q" + } + } + + switch -exact -- $method { + base64 { + set result [base64 -mode decode -- $string] + } + quoted-printable { + set result [qp_decode $string 1] + } + {} { + # Go ahead + } + default { + error "Can't handle content encoding \"$method\"" + } + } + + return [list $enc $method $result] +} + +# ::mime::field_decode -- +# +# Word decodes strings that have been word encoded as per RFC 2047 +# and converts the string from the original encoding/charset to UTF. +# +# Arguments: +# field The string to decode +# +# Results: +# Returns the decoded string in UTF. + +proc ::mime::field_decode {field} { + # ::mime::field_decode is broken. Here's a new version. + # This code is in the public domain. Don Libes + + # Step through a field for mime-encoded words, building a new + # version with unencoded equivalents. + + # Sorry about the grotesque regexp. Most of it is sensible. One + # notable fudge: the final $ is needed because of an apparent bug + # in the regexp engine where the preceding .* otherwise becomes + # non-greedy - perhaps because of the earlier ".*?", sigh. + + while {[regexp {(.*?)(=\?(?:[^?]+)\?(?:.)\?(?:[^?]*)\?=)(.*)$} $field \ + ignore prefix encoded field] + } { + # don't allow whitespace between encoded words per RFC 2047 + if {{} ne $prefix} { + if {![string is space $prefix]} { + append result $prefix + } + } + + set decoded [word_decode $encoded] + foreach {charset - string} $decoded break + + append result [::encoding convertfrom $charset $string] + } + append result $field + return $result +} + +## One-Shot Initialization + +::apply {{} { + variable encList + variable encAliasList + variable reversemap + + foreach {enc mimeType} $encList { + if {$mimeType eq {}} continue + set reversemap([string tolower $mimeType]) $enc + } + + foreach {enc mimeType} $encAliasList { + set reversemap([string tolower $mimeType]) $enc + } + + # Drop the helper variables + unset encList encAliasList + +} ::mime} + + +variable ::mime::internal 0 diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/mime-1.7.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/mime-1.7.1.tm new file mode 100644 index 00000000..b4b0d61d --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/mime-1.7.1.tm @@ -0,0 +1,3934 @@ +# mime.tcl - MIME body parts +# +# (c) 1999-2000 Marshall T. Rose +# (c) 2000 Brent Welch +# (c) 2000 Sandeep Tamhankar +# (c) 2000 Dan Kuchler +# (c) 2000-2001 Eric Melski +# (c) 2001 Jeff Hobbs +# (c) 2001-2008 Andreas Kupries +# (c) 2002-2003 David Welton +# (c) 2003-2008 Pat Thoyts +# (c) 2005 Benjamin Riefenstahl +# (c) 2013-2021 Poor Yorick +# +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# Influenced by Borenstein's/Rose's safe-tcl (circa 1993) and Darren New's +# unpublished package of 1999. +# + +# new string features and inline scan are used, requiring 8.3. +package require Tcl 8.5 + +package provide mime 1.7.1 +package require tcl::chan::memchan + + +if {[catch {package require Trf 2.0}]} { + + # Fall-back to tcl-based procedures of base64 and quoted-printable + # encoders + ## + # Warning! + ## + # These are a fragile emulations of the more general calling + # sequence that appears to work with this code here. + ## + # The `__ignored__` arguments are expected to be `--` options on + # the caller's side. (See the uses in `copymessageaux`, + # `buildmessageaux`, `parsepart`, and `getbody`). + + package require base64 2.0 + set ::major [lindex [split [package require md5] .] 0] + + # Create these commands in the mime namespace so that they + # won't collide with things at the global namespace level + + namespace eval ::mime { + proc base64 {-mode what __ignored__ chunk} { + return [base64::$what $chunk] + } + proc quoted-printable {-mode what __ignored__ chunk} { + return [mime::qp_$what $chunk] + } + + if {$::major < 2} { + # md5 v1, result is hex string ready for use. + proc md5 {__ignored__ string} { + return [md5::md5 $string] + } + } else { + # md5 v2, need option to get hex string + proc md5 {__ignored__ string} { + return [md5::md5 -hex $string] + } + } + } + + unset ::major +} + +# +# state variables: +# +# canonicalP: input is in its canonical form +# content: type/subtype +# params: dictionary (keys are lower-case) +# encoding: transfer encoding +# version: MIME-version +# header: dictionary (keys are lower-case) +# lowerL: list of header keys, lower-case +# mixedL: list of header keys, mixed-case +# value: either "file", "parts", or "string" +# +# file: input file +# fd: cached file-descriptor, typically for root +# root: token for top-level part, for (distant) subordinates +# offset: number of octets from beginning of file/string +# count: length in octets of (encoded) content +# +# parts: list of bodies (tokens) +# +# string: input string +# +# cid: last child-id assigned +# + + +namespace eval ::mime { + variable mime + array set mime {uid 0 cid 0} + + # RFC 822 lexemes + variable addrtokenL + lappend addrtokenL \; , < > : . ( ) @ \" \[ ] \\ + variable addrlexemeL { + LX_SEMICOLON LX_COMMA + LX_LBRACKET LX_RBRACKET + LX_COLON LX_DOT + LX_LPAREN LX_RPAREN + LX_ATSIGN LX_QUOTE + LX_LSQUARE LX_RSQUARE + LX_QUOTE + } + + # RFC 2045 lexemes + variable typetokenL + lappend typetokenL \; , < > : ? ( ) @ \" \[ \] = / \\ + variable typelexemeL { + LX_SEMICOLON LX_COMMA + LX_LBRACKET LX_RBRACKET + LX_COLON LX_QUESTION + LX_LPAREN LX_RPAREN + LX_ATSIGN LX_QUOTE + LX_LSQUARE LX_RSQUARE + LX_EQUALS LX_SOLIDUS + LX_QUOTE + } + + variable encList { + ascii US-ASCII + big5 Big5 + cp1250 Windows-1250 + cp1251 Windows-1251 + cp1252 Windows-1252 + cp1253 Windows-1253 + cp1254 Windows-1254 + cp1255 Windows-1255 + cp1256 Windows-1256 + cp1257 Windows-1257 + cp1258 Windows-1258 + cp437 IBM437 + cp737 {} + cp775 IBM775 + cp850 IBM850 + cp852 IBM852 + cp855 IBM855 + cp857 IBM857 + cp860 IBM860 + cp861 IBM861 + cp862 IBM862 + cp863 IBM863 + cp864 IBM864 + cp865 IBM865 + cp866 IBM866 + cp869 IBM869 + cp874 {} + cp932 {} + cp936 GBK + cp949 {} + cp950 {} + dingbats {} + ebcdic {} + euc-cn EUC-CN + euc-jp EUC-JP + euc-kr EUC-KR + gb12345 GB12345 + gb1988 GB1988 + gb2312 GB2312 + iso2022 ISO-2022 + iso2022-jp ISO-2022-JP + iso2022-kr ISO-2022-KR + iso8859-1 ISO-8859-1 + iso8859-2 ISO-8859-2 + iso8859-3 ISO-8859-3 + iso8859-4 ISO-8859-4 + iso8859-5 ISO-8859-5 + iso8859-6 ISO-8859-6 + iso8859-7 ISO-8859-7 + iso8859-8 ISO-8859-8 + iso8859-9 ISO-8859-9 + iso8859-10 ISO-8859-10 + iso8859-13 ISO-8859-13 + iso8859-14 ISO-8859-14 + iso8859-15 ISO-8859-15 + iso8859-16 ISO-8859-16 + jis0201 JIS_X0201 + jis0208 JIS_C6226-1983 + jis0212 JIS_X0212-1990 + koi8-r KOI8-R + koi8-u KOI8-U + ksc5601 KS_C_5601-1987 + macCentEuro {} + macCroatian {} + macCyrillic {} + macDingbats {} + macGreek {} + macIceland {} + macJapan {} + macRoman {} + macRomania {} + macThai {} + macTurkish {} + macUkraine {} + shiftjis Shift_JIS + symbol {} + tis-620 TIS-620 + unicode {} + utf-8 UTF-8 + } + + variable encodings + array set encodings $encList + variable reversemap + # Initialized at the bottom of the file + + variable encAliasList { + ascii ANSI_X3.4-1968 + ascii iso-ir-6 + ascii ANSI_X3.4-1986 + ascii ISO_646.irv:1991 + ascii ASCII + ascii ISO646-US + ascii us + ascii IBM367 + ascii cp367 + cp437 cp437 + cp437 437 + cp775 cp775 + cp850 cp850 + cp850 850 + cp852 cp852 + cp852 852 + cp855 cp855 + cp855 855 + cp857 cp857 + cp857 857 + cp860 cp860 + cp860 860 + cp861 cp861 + cp861 861 + cp861 cp-is + cp862 cp862 + cp862 862 + cp863 cp863 + cp863 863 + cp864 cp864 + cp865 cp865 + cp865 865 + cp866 cp866 + cp866 866 + cp869 cp869 + cp869 869 + cp869 cp-gr + cp936 CP936 + cp936 MS936 + cp936 Windows-936 + iso8859-1 ISO_8859-1:1987 + iso8859-1 iso-ir-100 + iso8859-1 ISO_8859-1 + iso8859-1 latin1 + iso8859-1 l1 + iso8859-1 IBM819 + iso8859-1 CP819 + iso8859-2 ISO_8859-2:1987 + iso8859-2 iso-ir-101 + iso8859-2 ISO_8859-2 + iso8859-2 latin2 + iso8859-2 l2 + iso8859-3 ISO_8859-3:1988 + iso8859-3 iso-ir-109 + iso8859-3 ISO_8859-3 + iso8859-3 latin3 + iso8859-3 l3 + iso8859-4 ISO_8859-4:1988 + iso8859-4 iso-ir-110 + iso8859-4 ISO_8859-4 + iso8859-4 latin4 + iso8859-4 l4 + iso8859-5 ISO_8859-5:1988 + iso8859-5 iso-ir-144 + iso8859-5 ISO_8859-5 + iso8859-5 cyrillic + iso8859-6 ISO_8859-6:1987 + iso8859-6 iso-ir-127 + iso8859-6 ISO_8859-6 + iso8859-6 ECMA-114 + iso8859-6 ASMO-708 + iso8859-6 arabic + iso8859-7 ISO_8859-7:1987 + iso8859-7 iso-ir-126 + iso8859-7 ISO_8859-7 + iso8859-7 ELOT_928 + iso8859-7 ECMA-118 + iso8859-7 greek + iso8859-7 greek8 + iso8859-8 ISO_8859-8:1988 + iso8859-8 iso-ir-138 + iso8859-8 ISO_8859-8 + iso8859-8 hebrew + iso8859-9 ISO_8859-9:1989 + iso8859-9 iso-ir-148 + iso8859-9 ISO_8859-9 + iso8859-9 latin5 + iso8859-9 l5 + iso8859-10 iso-ir-157 + iso8859-10 l6 + iso8859-10 ISO_8859-10:1992 + iso8859-10 latin6 + iso8859-14 iso-ir-199 + iso8859-14 ISO_8859-14:1998 + iso8859-14 ISO_8859-14 + iso8859-14 latin8 + iso8859-14 iso-celtic + iso8859-14 l8 + iso8859-15 ISO_8859-15 + iso8859-15 Latin-9 + iso8859-16 iso-ir-226 + iso8859-16 ISO_8859-16:2001 + iso8859-16 ISO_8859-16 + iso8859-16 latin10 + iso8859-16 l10 + jis0201 X0201 + jis0208 iso-ir-87 + jis0208 x0208 + jis0208 JIS_X0208-1983 + jis0212 x0212 + jis0212 iso-ir-159 + ksc5601 iso-ir-149 + ksc5601 KS_C_5601-1989 + ksc5601 KSC5601 + ksc5601 korean + shiftjis MS_Kanji + utf-8 UTF8 + } + + namespace export {*}{ + copymessage finalize getbody getheader getproperty initialize + mapencoding parseaddress parsedatetime reversemapencoding setheader + uniqueID + } +} + +# ::mime::initialize -- +# +# Creates a MIME part, and returnes the MIME token for that part. +# +# Arguments: +# args Args can be any one of the following: +# ?-canonical type/subtype +# ?-param {key value}?... +# ?-encoding value? +# ?-header {key value}?... ? +# (-file name | -string value | -parts {token1 ... tokenN}) +# +# If the -canonical option is present, then the body is in +# canonical (raw) form and is found by consulting either the -file, +# -string, or -parts option. +# +# In addition, both the -param and -header options may occur zero +# or more times to specify "Content-Type" parameters (e.g., +# "charset") and header keyword/values (e.g., +# "Content-Disposition"), respectively. +# +# Also, -encoding, if present, specifies the +# "Content-Transfer-Encoding" when copying the body. +# +# If the -canonical option is not present, then the MIME part +# contained in either the -file or the -string option is parsed, +# dynamically generating subordinates as appropriate. +# +# Results: +# An initialized mime token. + +proc ::mime::initialize args { + global errorCode errorInfo + + variable mime + + set token [namespace current]::[incr mime(uid)] + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[catch [list mime::initializeaux $token {*}$args] result eopts]} { + catch {mime::finalize $token -subordinates dynamic} + return -options $eopts $result + } + return $token +} + +# ::mime::initializeaux -- +# +# Configures the MIME token created in mime::initialize based on +# the arguments that mime::initialize supports. +# +# Arguments: +# token The MIME token to configure. +# args Args can be any one of the following: +# ?-canonical type/subtype +# ?-param {key value}?... +# ?-encoding value? +# ?-header {key value}?... ? +# (-file name | -string value | -parts {token1 ... tokenN}) +# +# Results: +# Either configures the mime token, or throws an error. + +proc ::mime::initializeaux {token args} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set params [set state(params) {}] + set state(encoding) {} + set state(version) 1.0 + + set state(header) {} + set state(lowerL) {} + set state(mixedL) {} + + set state(cid) 0 + + set userheader 0 + + set argc [llength $args] + for {set argx 0} {$argx < $argc} {incr argx} { + set option [lindex $args $argx] + if {[incr argx] >= $argc} { + error "missing argument to $option" + } + set value [lindex $args $argx] + + switch -- $option { + -canonical { + set state(content) [string tolower $value] + } + + -param { + if {[llength $value] != 2} { + error "-param expects a key and a value, not $value" + } + set lower [string tolower [set mixed [lindex $value 0]]] + if {[info exists params($lower)]} { + error "the $mixed parameter may be specified at most once" + } + + set params($lower) [lindex $value 1] + set state(params) [array get params] + } + + -encoding { + set value [string tolower $value[set value {}]] + + switch -- $value { + 7bit - 8bit - binary - quoted-printable - base64 { + } + + default { + error "unknown value for -encoding $state(encoding)" + } + } + set state(encoding) [string tolower $value] + } + + -header { + if {[llength $value] != 2} { + error "-header expects a key and a value, not $value" + } + set lower [string tolower [set mixed [lindex $value 0]]] + if {$lower eq {content-type}} { + error "use -canonical instead of -header $value" + } + if {$lower eq {content-transfer-encoding}} { + error "use -encoding instead of -header $value" + } + if {$lower in {content-md5 mime-version}} { + error {don't go there...} + } + if {$lower ni $state(lowerL)} { + lappend state(lowerL) $lower + lappend state(mixedL) $mixed + } + + set userheader 1 + + array set header $state(header) + lappend header($lower) [lindex $value 1] + set state(header) [array get header] + } + + -file { + set state(file) $value + } + + -parts { + set state(parts) $value + } + + -string { + set state(string) $value + + set state(lines) [split $value \n] + set state(lines.count) [llength $state(lines)] + set state(lines.current) 0 + } + + -root { + # the following are internal options + + set state(root) $value + } + + -offset { + set state(offset) $value + } + + -count { + set state(count) $value + } + + -lineslist { + set state(lines) $value + set state(lines.count) [llength $state(lines)] + set state(lines.current) 0 + #state(string) is needed, but will be built when required + set state(string) {} + } + + default { + error "unknown option $option" + } + } + } + + #We only want one of -file, -parts or -string: + set valueN 0 + foreach value {file parts string} { + if {[info exists state($value)]} { + set state(value) $value + incr valueN + } + } + if {$valueN != 1 && ![info exists state(lines)]} { + error {specify exactly one of -file, -parts, or -string} + } + + if {[set state(canonicalP) [info exists state(content)]]} { + switch -- $state(value) { + file { + set state(offset) 0 + } + + parts { + switch -glob -- $state(content) { + text/* + - + image/* + - + audio/* + - + video/* { + error "-canonical $state(content) and -parts do not mix" + } + + default { + if {$state(encoding) ne {}} { + error {-encoding and -parts do not mix} + } + } + } + } + default {# Go ahead} + } + + if {[lsearch -exact $state(lowerL) content-id] < 0} { + lappend state(lowerL) content-id + lappend state(mixedL) Content-ID + + array set header $state(header) + lappend header(content-id) [uniqueID] + set state(header) [array get header] + } + + set state(version) 1.0 + return + } + + if {$state(params) ne {}} { + error {-param requires -canonical} + } + if {$state(encoding) ne {}} { + error {-encoding requires -canonical} + } + if {$userheader} { + error {-header requires -canonical} + } + if {[info exists state(parts)]} { + error {-parts requires -canonical} + } + + if {[set fileP [info exists state(file)]]} { + if {[set openP [info exists state(root)]]} { + # FRINK: nocheck + variable $state(root) + upvar 0 $state(root) root + + set state(fd) $root(fd) + } else { + set state(root) $token + set state(fd) [open $state(file) RDONLY] + set state(offset) 0 + seek $state(fd) 0 end + set state(count) [tell $state(fd)] + + fconfigure $state(fd) -translation binary + } + } + + set code [catch {mime::parsepart $token} result] + set ecode $errorCode + set einfo $errorInfo + + if {$fileP} { + if {!$openP} { + unset state(root) + catch {close $state(fd)} + } + unset state(fd) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::parsepart -- +# +# Parses the MIME headers and attempts to break up the message +# into its various parts, creating a MIME token for each part. +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# Throws an error if it has problems parsing the MIME token, +# otherwise it just sets up the appropriate variables. + +proc ::mime::parsepart {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[set fileP [info exists state(file)]]} { + seek $state(fd) [set pos $state(offset)] start + set last [expr {$state(offset) + $state(count) - 1}] + } else { + set string $state(string) + } + + set vline {} + while 1 { + set blankP 0 + if {$fileP} { + if {($pos > $last) || ([set x [gets $state(fd) line]] <= 0)} { + set blankP 1 + } else { + incr pos [expr {$x + 1}] + } + } else { + if {$state(lines.current) >= $state(lines.count)} { + set blankP 1 + set line {} + } else { + set line [lindex $state(lines) $state(lines.current)] + incr state(lines.current) + set x [string length $line] + if {$x == 0} {set blankP 1} + } + } + + if {!$blankP && [string match *\r $line]} { + set line [string range $line 0 $x-2] + if {$x == 1} { + set blankP 1 + } + } + + if {!$blankP && ( + [string first { } $line] == 0 + || + [string first \t $line] == 0 + )} { + append vline \n $line + continue + } + + if {$vline eq {}} { + if {$blankP} { + break + } + + set vline $line + continue + } + + if { + [set x [string first : $vline]] <= 0 + || + [set mixed [string trimright [ + string range $vline 0 [expr {$x - 1}]]]] eq {} + } { + error "improper line in header: $vline" + } + set value [string trim [string range $vline [expr {$x + 1}] end]] + switch -- [set lower [string tolower $mixed]] { + content-type { + if {[info exists state(content)]} { + error "multiple Content-Type fields starting with $vline" + } + + if {![catch {set x [parsetype $token $value]}]} { + set state(content) [lindex $x 0] + set state(params) [lindex $x 1] + } + } + + content-md5 { + } + + content-transfer-encoding { + if { + $state(encoding) ne {} + && + $state(encoding) ne [string tolower $value] + } { + error "multiple Content-Transfer-Encoding fields starting with $vline" + } + + set state(encoding) [string tolower $value] + } + + mime-version { + set state(version) $value + } + + default { + if {[lsearch -exact $state(lowerL) $lower] < 0} { + lappend state(lowerL) $lower + lappend state(mixedL) $mixed + } + + array set header $state(header) + lappend header($lower) $value + set state(header) [array get header] + } + } + + if {$blankP} { + break + } + set vline $line + } + + if {![info exists state(content)]} { + set state(content) text/plain + set state(params) [list charset us-ascii] + } + + if {![string match multipart/* $state(content)]} { + if {$fileP} { + set x [tell $state(fd)] + incr state(count) [expr {$state(offset) - $x}] + set state(offset) $x + } else { + # rebuild string, this is cheap and needed by other functions + set state(string) [join [ + lrange $state(lines) $state(lines.current) end] \n] + } + + if {[string match message/* $state(content)]} { + # FRINK: nocheck + variable [set child $token-[incr state(cid)]] + + set state(value) parts + set state(parts) $child + if {$fileP} { + mime::initializeaux $child \ + -file $state(file) -root $state(root) \ + -offset $state(offset) -count $state(count) + } else { + if {[info exists state(encoding)]} { + set strng [join [ + lrange $state(lines) $state(lines.current) end] \n] + switch -- $state(encoding) { + base64 - + quoted-printable { + set strng [$state(encoding) -mode decode -- $strng] + } + default {} + } + mime::initializeaux $child -string $strng + } else { + mime::initializeaux $child -lineslist [ + lrange $state(lines) $state(lines.current) end] + } + } + } + + return + } + + set state(value) parts + + set boundary {} + foreach {k v} $state(params) { + if {$k eq {boundary}} { + set boundary $v + break + } + } + if {$boundary eq {}} { + error "boundary parameter is missing in $state(content)" + } + if {[string trim $boundary] eq {}} { + error "boundary parameter is empty in $state(content)" + } + + if {$fileP} { + set pos [tell $state(fd)] + # This variable is like 'start', for the reasons laid out + # below, in the other branch of this conditional. + set initialpos $pos + } else { + # This variable is like 'start', a list of lines in the + # part. This record is made even before we find a starting + # boundary and used if we run into the terminating boundary + # before a starting boundary was found. In that case the lines + # before the terminator as recorded by tracelines are seen as + # the part, or at least we attempt to parse them as a + # part. See the forceoctet and nochild flags later. We cannot + # use 'start' as that records lines only after the starting + # boundary was found. + set tracelines [list] + } + + set inP 0 + set moreP 1 + set forceoctet 0 + while {$moreP} { + if {$fileP} { + if {$pos > $last} { + # We have run over the end of the part per the outer + # information without finding a terminating boundary. + # We now fake the boundary and force the parser to + # give any new part coming of this a mime-type of + # application/octet-stream regardless of header + # information. + set line "--$boundary--" + set x [string length $line] + set forceoctet 1 + } else { + if {[set x [gets $state(fd) line]] < 0} { + error "end-of-file encountered while parsing $state(content)" + } + } + incr pos [expr {$x + 1}] + } else { + if {$state(lines.current) >= $state(lines.count)} { + error "end-of-string encountered while parsing $state(content)" + } else { + set line [lindex $state(lines) $state(lines.current)] + incr state(lines.current) + set x [string length $line] + } + set x [string length $line] + } + if {[string last \r $line] == $x - 1} { + set line [string range $line 0 [expr {$x - 2}]] + set crlf 2 + } else { + set crlf 1 + } + + if {[string first --$boundary $line] != 0} { + if {$inP && !$fileP} { + lappend start $line + } + continue + } else { + lappend tracelines $line + } + + if {!$inP} { + # Haven't seen the starting boundary yet. Check if the + # current line contains this starting boundary. + + if {$line eq "--$boundary"} { + # Yes. Switch parser state to now search for the + # terminating boundary of the part and record where + # the part begins (or initialize the recorder for the + # lines in the part). + set inP 1 + if {$fileP} { + set start $pos + } else { + set start [list] + } + continue + } elseif {$line eq "--$boundary--"} { + # We just saw a terminating boundary before we ever + # saw the starting boundary of a part. This forces us + # to stop parsing, we do this by forcing the parser + # into an accepting state. We will try to create a + # child part based on faked start position or recorded + # lines, or, if that fails, let the current part have + # no children. + + # As an example note the test case mime-3.7 and the + # referenced file "badmail1.txt". + + set inP 1 + if {$fileP} { + set start $initialpos + } else { + set start $tracelines + } + set forceoctet 1 + # Fall through. This brings to the creation of the new + # part instead of searching further and possible + # running over the end. + } else { + continue + } + } + + # Looking for the end of the current part. We accept both a + # terminating boundary and the starting boundary of the next + # part as the end of the current part. + + if {[set moreP [string compare $line --$boundary--]] + && $line ne "--$boundary"} { + + # The current part has not ended, so we record the line + # if we are inside a part and doing string parsing. + if {$inP && !$fileP} { + lappend start $line + } + continue + } + + # The current part has ended. We now determine the exact + # boundaries, create a mime part object for it and recursively + # parse it deeper as part of that action. + + # FRINK: nocheck + variable [set child $token-[incr state(cid)]] + + lappend state(parts) $child + + set nochild 0 + if {$fileP} { + if {[set count [expr {$pos - ($start + $x + $crlf + 1)}]] < 0} { + set count 0 + } + if {$forceoctet} { + set ::errorInfo {} + if {[catch { + mime::initializeaux $child \ + -file $state(file) -root $state(root) \ + -offset $start -count $count + }]} { + set nochild 1 + set state(parts) [lrange $state(parts) 0 end-1] + } } else { + mime::initializeaux $child \ + -file $state(file) -root $state(root) \ + -offset $start -count $count + } + seek $state(fd) [set start $pos] start + } else { + if {$forceoctet} { + if {[catch { + mime::initializeaux $child -lineslist $start + }]} { + set nochild 1 + set state(parts) [lrange $state(parts) 0 end-1] + } + } else { + mime::initializeaux $child -lineslist $start + } + set start {} + } + if {$forceoctet && !$nochild} { + variable $child + upvar 0 $child childstate + set childstate(content) application/octet-stream + } + set forceoctet 0 + } +} + +# ::mime::parsetype -- +# +# Parses the string passed in and identifies the content-type and +# params strings. +# +# Arguments: +# token The MIME token to parse. +# string The content-type string that should be parsed. +# +# Results: +# Returns the content and params for the string as a two element +# tcl list. + +proc ::mime::parsetype {token string} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + variable typetokenL + variable typelexemeL + + set state(input) $string + set state(buffer) {} + set state(lastC) LX_END + set state(comment) {} + set state(tokenL) $typetokenL + set state(lexemeL) $typelexemeL + + set code [catch {mime::parsetypeaux $token $string} result] + set ecode $errorCode + set einfo $errorInfo + + unset {*}{ + state(input) + state(buffer) + state(lastC) + state(comment) + state(tokenL) + state(lexemeL) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::parsetypeaux -- +# +# A helper function for mime::parsetype. Parses the specified +# string looking for the content type and params. +# +# Arguments: +# token The MIME token to parse. +# string The content-type string that should be parsed. +# +# Results: +# Returns the content and params for the string as a two element +# tcl list. + +proc ::mime::parsetypeaux {token string} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[parselexeme $token] ne {LX_ATOM}} { + error [format {expecting type (found %s)} $state(buffer)] + } + set type [string tolower $state(buffer)] + + switch -- [parselexeme $token] { + LX_SOLIDUS { + } + + LX_END { + if {$type ne {message}} { + error "expecting type/subtype (found $type)" + } + + return [list message/rfc822 {}] + } + + default { + error [format "expecting \"/\" (found %s)" $state(buffer)] + } + } + + if {[parselexeme $token] ne {LX_ATOM}} { + error [format "expecting subtype (found %s)" $state(buffer)] + } + append type [string tolower /$state(buffer)] + + array set params {} + while 1 { + switch -- [parselexeme $token] { + LX_END { + return [list $type [array get params]] + } + + LX_SEMICOLON { + } + + default { + error [format "expecting \";\" (found %s)" $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_END { + return [list $type [array get params]] + } + + LX_ATOM { + } + + default { + error [format "expecting attribute (found %s)" $state(buffer)] + } + } + + set attribute [string tolower $state(buffer)] + + if {[parselexeme $token] ne {LX_EQUALS}} { + error [format {expecting "=" (found %s)} $state(buffer)] + } + + switch -- [parselexeme $token] { + LX_ATOM { + } + + LX_QSTRING { + set state(buffer) [ + string range $state(buffer) 1 [ + expr {[string length $state(buffer)] - 2}]] + } + + default { + error [format {expecting value (found %s)} $state(buffer)] + } + } + set params($attribute) $state(buffer) + } +} + +# ::mime::finalize -- +# +# mime::finalize destroys a MIME part. +# +# If the -subordinates option is present, it specifies which +# subordinates should also be destroyed. The default value is +# "dynamic". +# +# Arguments: +# token The MIME token to parse. +# args Args can be optionally be of the following form: +# ?-subordinates "all" | "dynamic" | "none"? +# +# Results: +# Returns an empty string. + +proc ::mime::finalize {token args} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set options [list -subordinates dynamic] + array set options $args + + switch -- $options(-subordinates) { + all { + #TODO: this code path is untested + if {$state(value) eq {parts}} { + foreach part $state(parts) { + eval [linsert $args 0 mime::finalize $part] + } + } + } + + dynamic { + for {set cid $state(cid)} {$cid > 0} {incr cid -1} { + eval [linsert $args 0 mime::finalize $token-$cid] + } + } + + none { + } + + default { + error "unknown value for -subordinates $options(-subordinates)" + } + } + + foreach name [array names state] { + unset state($name) + } + # FRINK: nocheck + unset $token +} + +# ::mime::getproperty -- +# +# mime::getproperty returns the properties of a MIME part. +# +# The properties are: +# +# property value +# ======== ===== +# content the type/subtype describing the content +# encoding the "Content-Transfer-Encoding" +# params a list of "Content-Type" parameters +# parts a list of tokens for the part's subordinates +# size the approximate size of the content (unencoded) +# +# The "parts" property is present only if the MIME part has +# subordinates. +# +# If mime::getproperty is invoked with the name of a specific +# property, then the corresponding value is returned; instead, if +# -names is specified, a list of all properties is returned; +# otherwise, a dictionary of properties is returned. +# +# Arguments: +# token The MIME token to parse. +# property One of 'content', 'encoding', 'params', 'parts', and +# 'size'. Defaults to returning a dictionary of +# properties. +# +# Results: +# Returns the properties of a MIME part + +proc ::mime::getproperty {token {property {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -- $property { + {} { + array set properties [list content $state(content) \ + encoding $state(encoding) \ + params $state(params) \ + size [getsize $token]] + if {[info exists state(parts)]} { + set properties(parts) $state(parts) + } + + return [array get properties] + } + + -names { + set names [list content encoding params] + if {[info exists state(parts)]} { + lappend names parts + } + + return $names + } + + content + - + encoding + - + params { + return $state($property) + } + + parts { + if {![info exists state(parts)]} { + error {MIME part is a leaf} + } + + return $state(parts) + } + + size { + return [getsize $token] + } + + default { + error "unknown property $property" + } + } +} + +# ::mime::getsize -- +# +# Determine the size (in bytes) of a MIME part/token +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# Returns the size in bytes of the MIME token. + +proc ::mime::getsize {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -- $state(value)/$state(canonicalP) { + file/0 { + set size $state(count) + } + + file/1 { + return [file size $state(file)] + } + + parts/0 + - + parts/1 { + set size 0 + foreach part $state(parts) { + incr size [getsize $part] + } + + return $size + } + + string/0 { + set size [string length $state(string)] + } + + string/1 { + return [string length $state(string)] + } + default { + error "Unknown combination \"$state(value)/$state(canonicalP)\"" + } + } + + if {$state(encoding) eq {base64}} { + set size [expr {($size * 3 + 2) / 4}] + } + + return $size +} + + +proc ::mime::getContentType token { + variable $token + upvar 0 $token state + set res $state(content) + + set boundary {} + foreach {k v} $state(params) { + if {$k eq {boundary}} { + set boundary $v + } + append res ";\n $k=\"$v\"" + } + + # Save boundary separate from the params + set state(boundary) $boundary + + if {([string match multipart/* $state(content)]) \ + && ($boundary eq {})} { + # we're doing everything in one pass... + set key [clock seconds]$token[info hostname][array get state] + set seqno 8 + while {[incr seqno -1] >= 0} { + set key [md5 -- $key] + } + set boundary "----- =_[string trim [base64 -mode encode -- $key]]" + + set state(boundary) $boundary + + append res ";\n boundary=\"$boundary\"" + } + return $res +} + +# ::mime::getheader -- +# +# mime::getheader returns the header of a MIME part. +# +# A header consists of zero or more key/value pairs. Each value is a +# list containing one or more strings. +# +# If mime::getheader is invoked with the name of a specific key, then +# a list containing the corresponding value(s) is returned; instead, +# if -names is specified, a list of all keys is returned; otherwise, a +# dictionary is returned. Note that when a +# key is specified (e.g., "Subject"), the list returned usually +# contains exactly one string; however, some keys (e.g., "Received") +# often occur more than once in the header, accordingly the list +# returned usually contains more than one string. +# +# Arguments: +# token The MIME token to parse. +# key Either a key or '-names'. If it is '-names' a list +# of all keys is returned. +# +# Results: +# Returns the header of a MIME part. + +proc ::mime::getheader {token {key {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set header $state(header) + switch -- $key { + {} { + set result {} + lappend result MIME-Version $state(version) + foreach lower $state(lowerL) mixed $state(mixedL) { + foreach value $header($lower) { + lappend result $mixed $value + } + } + set tencoding [getTransferEncoding $token] + if {$tencoding ne {}} { + lappend result Content-Transfer-Encoding $tencoding + } + lappend result Content-Type [getContentType $token] + return $result + } + + -names { + return $state(mixedL) + } + + default { + set lower [string tolower $key] + + switch $lower { + content-transfer-encoding { + return [getTransferEncoding $token] + } + content-type { + return [list [getContentType $token]] + } + mime-version { + return [list $state(version)] + } + default { + if {![info exists header($lower)]} { + error "key $key not in header" + } + return $header($lower) + } + } + } + } +} + + +proc ::mime::getTransferEncoding token { + variable $token + upvar 0 $token state + set res {} + if {[set encoding $state(encoding)] eq {}} { + set encoding [encoding $token] + } + if {$encoding ne {}} { + set res $encoding + } + switch -- $encoding { + base64 + - + quoted-printable { + set converter $encoding + } + 7bit - 8bit - binary - {} { + # Bugfix for [#477088], also [#539952] + # Go ahead + } + default { + error "Can't handle content encoding \"$encoding\"" + } + } + return $res +} + +# ::mime::setheader -- +# +# mime::setheader writes, appends to, or deletes the value associated +# with a key in the header. +# +# The value for -mode is one of: +# +# write: the key/value is either created or overwritten (the +# default); +# +# append: a new value is appended for the key (creating it as +# necessary); or, +# +# delete: all values associated with the key are removed (the +# "value" parameter is ignored). +# +# Regardless, mime::setheader returns the previous value associated +# with the key. +# +# Arguments: +# token The MIME token to parse. +# key The name of the key whose value should be set. +# value The value for the header key to be set to. +# args An optional argument of the form: +# ?-mode "write" | "append" | "delete"? +# +# Results: +# Returns previous value associated with the specified key. + +proc ::mime::setheader {token key value args} { + # FRINK: nocheck + variable internal + variable $token + upvar 0 $token state + + array set options [list -mode write] + array set options $args + + set lower [string tolower $key] + array set header $state(header) + if {[set x [lsearch -exact $state(lowerL) $lower]] < 0} { + #TODO: this code path is not tested + if {$options(-mode) eq {delete}} { + error "key $key not in header" + } + + lappend state(lowerL) $lower + lappend state(mixedL) $key + + set result {} + } else { + set result $header($lower) + } + switch -- $options(-mode) { + append - write { + if {!$internal} { + switch -- $lower { + content-md5 + - + content-type + - + content-transfer-encoding + - + mime-version { + set values [getheader $token $lower] + if {$value ni $values} { + error "key $key may not be set" + } + } + default {# Skip key} + } + } + switch -- $options(-mode) { + append { + lappend header($lower) $value + } + write { + set header($lower) [list $value] + } + } + } + delete { + unset header($lower) + set state(lowerL) [lreplace $state(lowerL) $x $x] + set state(mixedL) [lreplace $state(mixedL) $x $x] + } + + default { + error "unknown value for -mode $options(-mode)" + } + } + + set state(header) [array get header] + return $result +} + +# ::mime::getbody -- +# +# mime::getbody returns the body of a leaf MIME part in canonical form. +# +# If the -command option is present, then it is repeatedly invoked +# with a fragment of the body as this: +# +# uplevel #0 $callback [list "data" $fragment] +# +# (The -blocksize option, if present, specifies the maximum size of +# each fragment passed to the callback.) +# When the end of the body is reached, the callback is invoked as: +# +# uplevel #0 $callback "end" +# +# Alternatively, if an error occurs, the callback is invoked as: +# +# uplevel #0 $callback [list "error" reason] +# +# Regardless, the return value of the final invocation of the callback +# is propagated upwards by mime::getbody. +# +# If the -command option is absent, then the return value of +# mime::getbody is a string containing the MIME part's entire body. +# +# Arguments: +# token The MIME token to parse. +# args Optional arguments of the form: +# ?-decode? ?-command callback ?-blocksize octets? ? +# +# Results: +# Returns a string containing the MIME part's entire body, or +# if '-command' is specified, the return value of the command +# is returned. + +proc ::mime::getbody {token args} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + set decode 0 + if {[set pos [lsearch -exact $args -decode]] >= 0} { + set decode 1 + set args [lreplace $args $pos $pos] + } + + array set options [list -command [ + list mime::getbodyaux $token] -blocksize 4096] + array set options $args + if {$options(-blocksize) < 1} { + error "-blocksize expects a positive integer, not $options(-blocksize)" + } + + set code 0 + set ecode {} + set einfo {} + + switch -- $state(value)/$state(canonicalP) { + file/0 { + set fd [open $state(file) RDONLY] + + set code [catch { + fconfigure $fd -translation binary + seek $fd [set pos $state(offset)] start + set last [expr {$state(offset) + $state(count) - 1}] + + set fragment {} + while {$pos <= $last} { + if {[set cc [ + expr {($last - $pos) + 1}]] > $options(-blocksize)} { + set cc $options(-blocksize) + } + incr pos [set len [ + string length [set chunk [read $fd $cc]]]] + switch -exact -- $state(encoding) { + base64 + - + quoted-printable { + if {([set x [string last \n $chunk]] > 0) \ + && ($x + 1 != $len)} { + set chunk [string range $chunk 0 $x] + seek $fd [incr pos [expr {($x + 1) - $len}]] start + } + set chunk [ + $state(encoding) -mode decode -- $chunk] + } + 7bit - 8bit - binary - {} { + # Bugfix for [#477088] + # Go ahead, leave chunk alone + } + default { + error "Can't handle content encoding \"$state(encoding)\"" + } + } + append fragment $chunk + + set cc [expr {$options(-blocksize) - 1}] + while {[string length $fragment] > $options(-blocksize)} { + uplevel #0 $options(-command) [ + list data [string range $fragment 0 $cc]] + + set fragment [ + string range $fragment $options(-blocksize) end] + } + } + if {[string length $fragment] > 0} { + uplevel #0 $options(-command) [list data $fragment] + } + } result] + set ecode $errorCode + set einfo $errorInfo + + catch {close $fd} + } + + file/1 { + set fd [open $state(file) RDONLY] + + set code [catch { + fconfigure $fd -translation binary + + while {[string length [ + set fragment [read $fd $options(-blocksize)]]] > 0} { + uplevel #0 $options(-command) [list data $fragment] + } + } result] + set ecode $errorCode + set einfo $errorInfo + + catch {close $fd} + } + + parts/0 + - + parts/1 { + error {MIME part isn't a leaf} + } + + string/0 + - + string/1 { + switch -- $state(encoding)/$state(canonicalP) { + base64/0 + - + quoted-printable/0 { + set fragment [ + $state(encoding) -mode decode -- $state(string)] + } + + default { + # Not a bugfix for [#477088], but clarification + # This handles no-encoding, 7bit, 8bit, and binary. + set fragment $state(string) + } + } + + set code [catch { + set cc [expr {$options(-blocksize) -1}] + while {[string length $fragment] > $options(-blocksize)} { + uplevel #0 $options(-command) [ + list data [string range $fragment 0 $cc]] + + set fragment [ + string range $fragment $options(-blocksize) end] + } + if {[string length $fragment] > 0} { + uplevel #0 $options(-command) [list data $fragment] + } + } result] + set ecode $errorCode + set einfo $errorInfo + } + default { + error "Unknown combination \"$state(value)/$state(canonicalP)\"" + } + } + + set code [catch { + if {$code} { + uplevel #0 $options(-command) [list error $result] + } else { + uplevel #0 $options(-command) [list end] + } + } result] + set ecode $errorCode + set einfo $errorInfo + + if {$code} { + return -code $code -errorinfo $einfo -errorcode $ecode $result + } + + if {$decode} { + array set params [mime::getproperty $token params] + + if {[info exists params(charset)]} { + set charset $params(charset) + } else { + set charset US-ASCII + } + + set enc [reversemapencoding $charset] + if {$enc ne {}} { + set result [::encoding convertfrom $enc $result] + } else { + return -code error "-decode failed: can't reversemap charset $charset" + } + } + + return $result +} + +# ::mime::getbodyaux -- +# +# Builds up the body of the message, fragment by fragment. When +# the entire message has been retrieved, it is returned. +# +# Arguments: +# token The MIME token to parse. +# reason One of 'data', 'end', or 'error'. +# fragment The section of data data fragment to extract a +# string from. +# +# Results: +# Returns nothing, except when called with the 'end' argument +# in which case it returns a string that contains all of the +# data that 'getbodyaux' has been called with. Will throw an +# error if it is called with the reason of 'error'. + +proc ::mime::getbodyaux {token reason {fragment {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch $reason { + data { + append state(getbody) $fragment + return {} + } + + end { + if {[info exists state(getbody)]} { + set result $state(getbody) + unset state(getbody) + } else { + set result {} + } + + return $result + } + + error { + catch {unset state(getbody)} + error $reason + } + + default { + error "Unknown reason \"$reason\"" + } + } +} + +# ::mime::copymessage -- +# +# mime::copymessage copies the MIME part to the specified channel. +# +# mime::copymessage operates synchronously, and uses fileevent to +# allow asynchronous operations to proceed independently. +# +# Arguments: +# token The MIME token to parse. +# channel The channel to copy the message to. +# +# Results: +# Returns nothing unless an error is thrown while the message +# is being written to the channel. + +proc ::mime::copymessage {token channel} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + set openP [info exists state(fd)] + + set code [catch {mime::copymessageaux $token $channel} result] + set ecode $errorCode + set einfo $errorInfo + + if {!$openP && [info exists state(fd)]} { + if {![info exists state(root)]} { + catch {close $state(fd)} + } + unset state(fd) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::copymessageaux -- +# +# mime::copymessageaux copies the MIME part to the specified channel. +# +# Arguments: +# token The MIME token to parse. +# channel The channel to copy the message to. +# +# Results: +# Returns nothing unless an error is thrown while the message +# is being written to the channel. + +proc ::mime::copymessageaux {token channel} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + array set header $state(header) + + set result {} + foreach {mixed value} [getheader $token] { + puts $channel "$mixed: $value" + } + + set boundary $state(boundary) ;# computed by `getheader` + + set converter {} + set encoding {} + if {$state(value) ne {parts}} { + if {$state(canonicalP)} { + if {[set encoding $state(encoding)] eq {}} { + set encoding [encoding $token] + } + if {$encoding ne {}} { + puts $channel "Content-Transfer-Encoding: $encoding" + } + switch -- $encoding { + base64 + - + quoted-printable { + set converter $encoding + } + 7bit - 8bit - binary - {} { + # Bugfix for [#477088], also [#539952] + # Go ahead + } + default { + error "Can't handle content encoding \"$encoding\"" + } + } + } + } + + if {[info exists state(error)]} { + unset state(error) + } + + switch -- $state(value) { + file { + set closeP 1 + if {[info exists state(root)]} { + # FRINK: nocheck + variable $state(root) + upvar 0 $state(root) root + + if {[info exists root(fd)]} { + set fd $root(fd) + set closeP 0 + } else { + set fd [set state(fd) [open $state(file) RDONLY]] + } + set size $state(count) + } else { + set fd [set state(fd) [open $state(file) RDONLY]] + # read until eof + set size -1 + } + seek $fd $state(offset) start + if {$closeP} { + fconfigure $fd -translation binary + } + + puts $channel {} + + while {$size != 0 && ![eof $fd]} { + if {$size < 0 || $size > 32766} { + set X [read $fd 32766] + } else { + set X [read $fd $size] + } + if {$size > 0} { + set size [expr {$size - [string length $X]}] + } + if {$converter eq {}} { + puts -nonewline $channel $X + } else { + puts -nonewline $channel [$converter -mode encode -- $X] + } + } + + if {$closeP} { + catch {close $state(fd)} + unset state(fd) + } + } + + parts { + if { + ![info exists state(root)] + && + [info exists state(file)] + } { + set state(fd) [open $state(file) RDONLY] + fconfigure $state(fd) -translation binary + } + + switch -glob -- $state(content) { + message/* { + puts $channel {} + foreach part $state(parts) { + mime::copymessage $part $channel + break + } + } + + default { + # Note RFC 2046: See buildmessageaux for details. + # + # The boundary delimiter MUST occur at the + # beginning of a line, i.e., following a CRLF, and + # the initial CRLF is considered to be attached to + # the boundary delimiter line rather than part of + # the preceding part. + # + # - The above means that the CRLF before $boundary + # is needed per the RFC, and the parts must not + # have a closing CRLF of their own. See Tcllib bug + # 1213527, and patch 1254934 for the problems when + # both file/string branches added CRLF after the + # body parts. + + + foreach part $state(parts) { + puts $channel \n--$boundary + mime::copymessage $part $channel + } + puts $channel \n--$boundary-- + } + } + + if {[info exists state(fd)]} { + catch {close $state(fd)} + unset state(fd) + } + } + + string { + if {[catch {fconfigure $channel -buffersize} blocksize]} { + set blocksize 4096 + } elseif {$blocksize < 512} { + set blocksize 512 + } + set blocksize [expr {($blocksize / 4) * 3}] + + # [893516] + fconfigure $channel -buffersize $blocksize + + puts $channel {} + + #TODO: tests don't cover these paths + if {$converter eq {}} { + puts -nonewline $channel $state(string) + } else { + puts -nonewline $channel [$converter -mode encode -- $state(string)] + } + } + default { + error "Unknown value \"$state(value)\"" + } + } + + flush $channel + + if {[info exists state(error)]} { + error $state(error) + } +} + +# ::mime::buildmessage -- +# +# Like copymessage, but produces a string rather than writing the message into a channel. +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# The message. + +proc ::mime::buildmessage token { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + + set openP [info exists state(fd)] + + set code [catch {mime::buildmessageaux $token} result] + if {![info exists errorCode]} { + set ecode {} + } else { + set ecode $errorCode + } + set einfo $errorInfo + + if {!$openP && [info exists state(fd)]} { + if {![info exists state(root)]} { + catch {close $state(fd)} + } + unset state(fd) + } + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + + +proc ::mime::buildmessageaux token { + set chan [tcl::chan::memchan] + chan configure $chan -translation crlf + copymessageaux $token $chan + seek $chan 0 + chan configure $chan -translation binary + set res [read $chan] + close $chan + return $res +} + +# ::mime::encoding -- +# +# Determines how a token is encoded. +# +# Arguments: +# token The MIME token to parse. +# +# Results: +# Returns the encoding of the message (the null string, base64, +# or quoted-printable). + +proc ::mime::encoding {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -glob -- $state(content) { + audio/* + - + image/* + - + video/* { + return base64 + } + + message/* + - + multipart/* { + return {} + } + default {# Skip} + } + + set asciiP 1 + set lineP 1 + switch -- $state(value) { + file { + set fd [open $state(file) RDONLY] + fconfigure $fd -translation binary + + while {[gets $fd line] >= 0} { + if {$asciiP} { + set asciiP [encodingasciiP $line] + } + if {$lineP} { + set lineP [encodinglineP $line] + } + if {(!$asciiP) && (!$lineP)} { + break + } + } + + catch {close $fd} + } + + parts { + return {} + } + + string { + foreach line [split $state(string) "\n"] { + if {$asciiP} { + set asciiP [encodingasciiP $line] + } + if {$lineP} { + set lineP [encodinglineP $line] + } + if {(!$asciiP) && (!$lineP)} { + break + } + } + } + default { + error "Unknown value \"$state(value)\"" + } + } + + switch -glob -- $state(content) { + text/* { + if {!$asciiP} { + #TODO: this path is not covered by tests + foreach {k v} $state(params) { + if {$k eq "charset"} { + set v [string tolower $v] + if {($v ne "us-ascii") \ + && (![string match {iso-8859-[1-8]} $v])} { + return base64 + } + + break + } + } + } + + if {!$lineP} { + return quoted-printable + } + } + + + default { + if {(!$asciiP) || (!$lineP)} { + return base64 + } + } + } + + return {} +} + +# ::mime::encodingasciiP -- +# +# Checks if a string is a pure ascii string, or if it has a non-standard +# form. +# +# Arguments: +# line The line to check. +# +# Results: +# Returns 1 if \r only occurs at the end of lines, and if all +# characters in the line are between the ASCII codes of 32 and 126. + +proc ::mime::encodingasciiP {line} { + foreach c [split $line {}] { + switch -- $c { + { } - \t - \r - \n { + } + + default { + binary scan $c c c + if {($c < 32) || ($c > 126)} { + return 0 + } + } + } + } + if { + [set r [string first \r $line]] < 0 + || + $r == {[string length $line] - 1} + } { + return 1 + } + + return 0 +} + +# ::mime::encodinglineP -- +# +# Checks if a string is a line is valid to be processed. +# +# Arguments: +# line The line to check. +# +# Results: +# Returns 1 the line is less than 76 characters long, the line +# contains more characters than just whitespace, the line does +# not start with a '.', and the line does not start with 'From '. + +proc ::mime::encodinglineP {line} { + if {([string length $line] > 76) \ + || ($line ne [string trimright $line]) \ + || ([string first . $line] == 0) \ + || ([string first {From } $line] == 0)} { + return 0 + } + + return 1 +} + +# ::mime::fcopy -- +# +# Appears to be unused. +# +# Arguments: +# +# Results: +# + +proc ::mime::fcopy {token count {error {}}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {$error ne {}} { + set state(error) $error + } + set state(doneP) 1 +} + +# ::mime::scopy -- +# +# Copy a portion of the contents of a mime token to a channel. +# +# Arguments: +# token The token containing the data to copy. +# channel The channel to write the data to. +# offset The location in the string to start copying +# from. +# len The amount of data to write. +# blocksize The block size for the write operation. +# +# Results: +# The specified portion of the string in the mime token is +# copied to the specified channel. + +proc ::mime::scopy {token channel offset len blocksize} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {$len <= 0} { + set state(doneP) 1 + fileevent $channel writable {} + return + } + + if {[set cc $len] > $blocksize} { + set cc $blocksize + } + + if {[catch { + puts -nonewline $channel [ + string range $state(string) $offset [expr {$offset + $cc - 1}]] + fileevent $channel writable [ + list mime::scopy $token $channel [ + incr offset $cc] [incr len -$cc] $blocksize] + } result] + } { + set state(error) $result + set state(doneP) 1 + fileevent $channel writable {} + } + return +} + +# ::mime::qp_encode -- +# +# Tcl version of quote-printable encode +# +# Arguments: +# string The string to quote. +# encoded_word Boolean value to determine whether or not encoded words +# (RFC 2047) should be handled or not. (optional) +# +# Results: +# The properly quoted string is returned. + +proc ::mime::qp_encode {string {encoded_word 0} {no_softbreak 0}} { + # 8.1+ improved string manipulation routines used. + # Replace outlying characters, characters that would normally + # be munged by EBCDIC gateways, and special Tcl characters "[\]{} + # with =xx sequence + + if {$encoded_word} { + # Special processing for encoded words (RFC 2047) + set regexp {[\x00-\x08\x0B-\x1E\x21-\x24\x3D\x40\x5B-\x5E\x60\x7B-\xFF\x09\x5F\x3F]} + lappend mapChars { } _ + } else { + set regexp {[\x00-\x08\x0B-\x1E\x21-\x24\x3D\x40\x5B-\x5E\x60\x7B-\xFF]} + } + regsub -all -- $regexp $string {[format =%02X [scan "\\&" %c]]} string + + # Replace the format commands with their result + + set string [subst -novariables $string] + + # soft/hard newlines and other + # Funky cases for SMTP compatibility + lappend mapChars " \n" =20\n \t\n =09\n \n\.\n =2E\n "\nFrom " "\n=46rom " + + set string [string map $mapChars $string] + + # Break long lines - ugh + + # Implementation of FR #503336 + if {$no_softbreak} { + set result $string + } else { + set result {} + foreach line [split $string \n] { + while {[string length $line] > 72} { + set chunk [string range $line 0 72] + if {[regexp -- (=|=.)$ $chunk dummy end]} { + + # Don't break in the middle of a code + + set len [expr {72 - [string length $end]}] + set chunk [string range $line 0 $len] + incr len + set line [string range $line $len end] + } else { + set line [string range $line 73 end] + } + append result $chunk=\n + } + append result $line\n + } + + # Trim off last \n, since the above code has the side-effect + # of adding an extra \n to the encoded string and return the + # result. + set result [string range $result 0 end-1] + } + + # If the string ends in space or tab, replace with =xx + + set lastChar [string index $result end] + if {$lastChar eq { }} { + set result [string replace $result end end =20] + } elseif {$lastChar eq "\t"} { + set result [string replace $result end end =09] + } + + return $result +} + +# ::mime::qp_decode -- +# +# Tcl version of quote-printable decode +# +# Arguments: +# string The quoted-printable string to decode. +# encoded_word Boolean value to determine whether or not encoded words +# (RFC 2047) should be handled or not. (optional) +# +# Results: +# The decoded string is returned. + +proc ::mime::qp_decode {string {encoded_word 0}} { + # 8.1+ improved string manipulation routines used. + # Special processing for encoded words (RFC 2047) + + if {$encoded_word} { + # _ == \x20, even if SPACE occupies a different code position + set string [string map [list _ \u0020] $string] + } + + # smash the white-space at the ends of lines since that must've been + # generated by an MUA. + + regsub -all -- {[ \t]+\n} $string \n string + set string [string trimright $string " \t"] + + # Protect the backslash for later subst and + # smash soft newlines, has to occur after white-space smash + # and any encoded word modification. + + #TODO: codepath not tested + set string [string map [list \\ {\\} =\n {}] $string] + + # Decode specials + + regsub -all -nocase {=([a-f0-9][a-f0-9])} $string {\\u00\1} string + + # process \u unicode mapped chars + + return [subst -novariables -nocommands $string] +} + +# ::mime::parseaddress -- +# +# This was originally written circa 1982 in C. we're still using it +# because it recognizes virtually every buggy address syntax ever +# generated! +# +# mime::parseaddress takes a string containing one or more 822-style +# address specifications and returns a list of dictionaries, for each +# address specified in the argument. +# +# Each dictionary contains these properties: +# +# property value +# ======== ===== +# address local@domain +# comment 822-style comment +# domain the domain part (rhs) +# error non-empty on a parse error +# group this address begins a group +# friendly user-friendly rendering +# local the local part (lhs) +# memberP this address belongs to a group +# phrase the phrase part +# proper 822-style address specification +# route 822-style route specification (obsolete) +# +# Note that one or more of these properties may be empty. +# +# Arguments: +# string The address string to parse +# +# Results: +# Returns a list of dictionaries, one element for each address +# specified in the argument. + +proc ::mime::parseaddress {string} { + global errorCode errorInfo + + variable mime + + set token [namespace current]::[incr mime(uid)] + # FRINK: nocheck + variable $token + upvar 0 $token state + + set code [catch {mime::parseaddressaux $token $string} result] + set ecode $errorCode + set einfo $errorInfo + + foreach name [array names state] { + unset state($name) + } + # FRINK: nocheck + catch {unset $token} + + return -code $code -errorinfo $einfo -errorcode $ecode $result +} + +# ::mime::parseaddressaux -- +# +# This was originally written circa 1982 in C. we're still using it +# because it recognizes virtually every buggy address syntax ever +# generated! +# +# mime::parseaddressaux does the actually parsing for mime::parseaddress +# +# Each dictionary contains these properties: +# +# property value +# ======== ===== +# address local@domain +# comment 822-style comment +# domain the domain part (rhs) +# error non-empty on a parse error +# group this address begins a group +# friendly user-friendly rendering +# local the local part (lhs) +# memberP this address belongs to a group +# phrase the phrase part +# proper 822-style address specification +# route 822-style route specification (obsolete) +# +# Note that one or more of these properties may be empty. +# +# Arguments: +# token The MIME token to work from. +# string The address string to parse +# +# Results: +# Returns a list of dictionaries, one for each address specified in the +# argument. + +proc ::mime::parseaddressaux {token string} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + variable addrtokenL + variable addrlexemeL + + set state(input) $string + set state(glevel) 0 + set state(buffer) {} + set state(lastC) LX_END + set state(tokenL) $addrtokenL + set state(lexemeL) $addrlexemeL + + set result {} + while {[addr_next $token]} { + if {[set tail $state(domain)] ne {}} { + set tail @$state(domain) + } else { + set tail @[info hostname] + } + if {[set address $state(local)] ne {}} { + #TODO: this path is not covered by tests + append address $tail + } + + if {$state(phrase) ne {}} { + #TODO: this path is not covered by tests + set state(phrase) [string trim $state(phrase) \"] + foreach t $state(tokenL) { + if {[string first $t $state(phrase)] >= 0} { + #TODO: is this quoting robust enough? + set state(phrase) \"$state(phrase)\" + break + } + } + + set proper "$state(phrase) <$address>" + } else { + set proper $address + } + + if {[set friendly $state(phrase)] eq {}} { + #TODO: this path is not covered by tests + if {[set note $state(comment)] ne {}} { + if {[string first ( $note] == 0} { + set note [string trimleft [string range $note 1 end]] + } + if { + [string last ) $note] + == [set len [expr {[string length $note] - 1}]] + } { + set note [string range $note 0 [expr {$len - 1}]] + } + set friendly $note + } + + if { + $friendly eq {} + && + [set mbox $state(local)] ne {} + } { + #TODO: this path is not covered by tests + set mbox [string trim $mbox \"] + + if {[string first / $mbox] != 0} { + set friendly $mbox + } elseif {[set friendly [addr_x400 $mbox PN]] ne {}} { + } elseif { + [set friendly [addr_x400 $mbox S]] ne {} + && + [set g [addr_x400 $mbox G]] ne {} + } { + set friendly "$g $friendly" + } + + if {$friendly eq {}} { + set friendly $mbox + } + } + } + set friendly [string trim $friendly \"] + + lappend result [list address $address \ + comment $state(comment) \ + domain $state(domain) \ + error $state(error) \ + friendly $friendly \ + group $state(group) \ + local $state(local) \ + memberP $state(memberP) \ + phrase $state(phrase) \ + proper $proper \ + route $state(route)] + + } + + unset {*}{ + state(input) + state(glevel) + state(buffer) + state(lastC) + state(tokenL) + state(lexemeL) + } + + return $result +} + +# ::mime::addr_next -- +# +# Locate the next address in a mime token. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns 1 if there is another address, and 0 if there is not. + +proc ::mime::addr_next {token} { + global errorCode errorInfo + # FRINK: nocheck + variable $token + upvar 0 $token state + set nocomplain [package vsatisfies [package provide Tcl] 8.4] + foreach prop {comment domain error group local memberP phrase route} { + if {$nocomplain} { + unset -nocomplain state($prop) + } else { + if {[catch {unset state($prop)}]} {set ::errorInfo {}} + } + } + + switch -- [set code [catch {mime::addr_specification $token} result]] { + 0 { + if {!$result} { + return 0 + } + + switch -- $state(lastC) { + LX_COMMA + - + LX_END { + } + default { + # catch trailing comments... + set lookahead $state(input) + mime::parselexeme $token + set state(input) $lookahead + } + } + } + + 7 { + set state(error) $result + + while {1} { + switch -- $state(lastC) { + LX_COMMA + - + LX_END { + break + } + + default { + mime::parselexeme $token + } + } + } + } + + default { + set ecode $errorCode + set einfo $errorInfo + + return -code $code -errorinfo $einfo -errorcode $ecode $result + } + } + + foreach prop {comment domain error group local memberP phrase route} { + if {![info exists state($prop)]} { + set state($prop) {} + } + } + + return 1 +} + +# ::mime::addr_specification -- +# +# Uses lookahead parsing to determine whether there is another +# valid e-mail address or not. Throws errors if unrecognized +# or invalid e-mail address syntax is used. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns 1 if there is another address, and 0 if there is not. + +proc ::mime::addr_specification {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set lookahead $state(input) + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + set state(phrase) $state(buffer) + } + + LX_SEMICOLON { + if {[incr state(glevel) -1] < 0} { + return -code 7 "extraneous semi-colon" + } + + catch {unset state(comment)} + return [addr_specification $token] + } + + LX_COMMA { + catch {unset state(comment)} + return [addr_specification $token] + } + + LX_END { + return 0 + } + + LX_LBRACKET { + return [addr_routeaddr $token] + } + + LX_ATSIGN { + set state(input) $lookahead + return [addr_routeaddr $token 0] + } + + default { + return -code 7 [ + format "unexpected character at beginning (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + append state(phrase) " " $state(buffer) + + return [addr_phrase $token] + } + + LX_LBRACKET { + return [addr_routeaddr $token] + } + + LX_COLON { + return [addr_group $token] + } + + LX_DOT { + set state(local) "$state(phrase)$state(buffer)" + unset state(phrase) + mime::addr_routeaddr $token 0 + mime::addr_end $token + } + + LX_ATSIGN { + set state(memberP) $state(glevel) + set state(local) $state(phrase) + unset state(phrase) + mime::addr_domain $token + mime::addr_end $token + } + + LX_SEMICOLON + - + LX_COMMA + - + LX_END { + set state(memberP) $state(glevel) + if { + $state(lastC) eq "LX_SEMICOLON" + && + ([incr state(glevel) -1] < 0) + } { + #TODO: this path is not covered by tests + return -code 7 "extraneous semi-colon" + } + + set state(local) $state(phrase) + unset state(phrase) + } + + default { + return -code 7 [ + format "expecting mailbox (found %s)" $state(buffer)] + } + } + + return 1 +} + +# ::mime::addr_routeaddr -- +# +# Parses the domain portion of an e-mail address. Finds the '@' +# sign and then calls mime::addr_route to verify the domain. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns 1 if there is another address, and 0 if there is not. + +proc ::mime::addr_routeaddr {token {checkP 1}} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set lookahead $state(input) + if {[parselexeme $token] eq "LX_ATSIGN"} { + #TODO: this path is not covered by tests + mime::addr_route $token + } else { + set state(input) $lookahead + } + + mime::addr_local $token + + switch -- $state(lastC) { + LX_ATSIGN { + mime::addr_domain $token + } + + LX_SEMICOLON + - + LX_RBRACKET + - + LX_COMMA + - + LX_END { + } + + default { + return -code 7 [ + format "expecting at-sign after local-part (found %s)" \ + $state(buffer)] + } + } + + if {($checkP) && ($state(lastC) ne "LX_RBRACKET")} { + return -code 7 [ + format "expecting right-bracket (found %s)" $state(buffer)] + } + + return 1 +} + +# ::mime::addr_route -- +# +# Attempts to parse the portion of the e-mail address after the @. +# Tries to verify that the domain definition has a valid form. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_route {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set state(route) @ + + while 1 { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_DLITERAL { + append state(route) $state(buffer) + } + + default { + return -code 7 \ + [format "expecting sub-route in route-part (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_COMMA { + append state(route) $state(buffer) + while 1 { + switch -- [parselexeme $token] { + LX_COMMA { + } + + LX_ATSIGN { + append state(route) $state(buffer) + break + } + + default { + return -code 7 \ + [format "expecting at-sign in route (found %s)" \ + $state(buffer)] + } + } + } + } + + LX_ATSIGN + - + LX_DOT { + append state(route) $state(buffer) + } + + LX_COLON { + append state(route) $state(buffer) + return + } + + default { + return -code 7 [ + format "expecting colon to terminate route (found %s)" \ + $state(buffer)] + } + } + } +} + +# ::mime::addr_domain -- +# +# Attempts to parse the portion of the e-mail address after the @. +# Tries to verify that the domain definition has a valid form. +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_domain {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + while 1 { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_DLITERAL { + append state(domain) $state(buffer) + } + + default { + return -code 7 [ + format "expecting sub-domain in domain-part (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_DOT { + append state(domain) $state(buffer) + } + + LX_ATSIGN { + append state(local) % $state(domain) + unset state(domain) + } + + default { + return + } + } + } +} + +# ::mime::addr_local -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_local {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set state(memberP) $state(glevel) + + while 1 { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + append state(local) $state(buffer) + } + + default { + return -code 7 \ + [format "expecting mailbox in local-part (found %s)" \ + $state(buffer)] + } + } + + switch -- [parselexeme $token] { + LX_DOT { + append state(local) $state(buffer) + } + + default { + return + } + } + } +} + +# ::mime::addr_phrase -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + + +proc ::mime::addr_phrase {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + while {1} { + switch -- [parselexeme $token] { + LX_ATOM + - + LX_QSTRING { + append state(phrase) " " $state(buffer) + } + + default { + break + } + } + } + + switch -- $state(lastC) { + LX_LBRACKET { + return [addr_routeaddr $token] + } + + LX_COLON { + return [addr_group $token] + } + + LX_DOT { + append state(phrase) $state(buffer) + return [addr_phrase $token] + } + + default { + return -code 7 [ + format "found phrase instead of mailbox (%s%s)" \ + $state(phrase) $state(buffer)] + } + } +} + +# ::mime::addr_group -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_group {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + if {[incr state(glevel)] > 1} { + return -code 7 [ + format "nested groups not allowed (found %s)" $state(phrase)] + } + + set state(group) $state(phrase) + unset state(phrase) + + set lookahead $state(input) + while 1 { + switch -- [parselexeme $token] { + LX_SEMICOLON + - + LX_END { + set state(glevel) 0 + return 1 + } + + LX_COMMA { + } + + default { + set state(input) $lookahead + return [addr_specification $token] + } + } + } +} + +# ::mime::addr_end -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_end {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + switch -- $state(lastC) { + LX_SEMICOLON { + if {[incr state(glevel) -1] < 0} { + return -code 7 "extraneous semi-colon" + } + } + + LX_COMMA + - + LX_END { + } + + default { + return -code 7 [ + format "junk after local@domain (found %s)" $state(buffer)] + } + } +} + +# ::mime::addr_x400 -- +# +# +# Arguments: +# token The MIME token to work from. +# +# Results: +# Returns nothing if successful, and throws an error if invalid +# syntax is found. + +proc ::mime::addr_x400 {mbox key} { + if {[set x [string first /$key= [string toupper $mbox]]] < 0} { + return {} + } + set mbox [string range $mbox [expr {$x + [string length $key] + 2}] end] + + if {[set x [string first / $mbox]] > 0} { + set mbox [string range $mbox 0 [expr {$x - 1}]] + } + + return [string trim $mbox \"] +} + +# ::mime::parsedatetime -- +# +# Fortunately the clock command in the Tcl 8.x core does all the heavy +# lifting for us (except for timezone calculations). +# +# mime::parsedatetime takes a string containing an 822-style date-time +# specification and returns the specified property. +# +# The list of properties and their ranges are: +# +# property range +# ======== ===== +# clock raw result of "clock scan" +# hour 0 .. 23 +# lmonth January, February, ..., December +# lweekday Sunday, Monday, ... Saturday +# mday 1 .. 31 +# min 0 .. 59 +# mon 1 .. 12 +# month Jan, Feb, ..., Dec +# proper 822-style date-time specification +# rclock elapsed seconds between then and now +# sec 0 .. 59 +# wday 0 .. 6 (Sun .. Mon) +# weekday Sun, Mon, ..., Sat +# yday 1 .. 366 +# year 1900 ... +# zone -720 .. 720 (minutes east of GMT) +# +# Arguments: +# value Either a 822-style date-time specification or '-now' +# if the current date/time should be used. +# property The property (from the list above) to return +# +# Results: +# Returns the string value of the 'property' for the date/time that was +# specified in 'value'. + +namespace eval ::mime { + variable WDAYS_SHORT [list Sun Mon Tue Wed Thu Fri Sat] + variable WDAYS_LONG [list Sunday Monday Tuesday Wednesday Thursday \ + Friday Saturday] + + # Counting months starts at 1, so just insert a dummy element + # at index 0. + variable MONTHS_SHORT [list {} \ + Jan Feb Mar Apr May Jun \ + Jul Aug Sep Oct Nov Dec] + variable MONTHS_LONG [list {} \ + January February March April May June July \ + August Sepember October November December] +} +proc ::mime::parsedatetime {value property} { + if {$value eq "-now"} { + set clock [clock seconds] + } elseif {[regexp {^(.*) ([+-])([0-9][0-9])([0-9][0-9])$} $value \ + -> value zone_sign zone_hour zone_min] + } { + set clock [clock scan $value -gmt 1] + if {[info exists zone_min]} { + set zone_min [scan $zone_min %d] + set zone_hour [scan $zone_hour %d] + set zone [expr {60 * ($zone_min + 60 * $zone_hour)}] + if {$zone_sign eq "+"} { + set zone -$zone + } + incr clock $zone + } + } else { + set clock [clock scan $value] + } + + switch -- $property { + clock { + return $clock + } + + hour { + set value [clock format $clock -format %H] + } + + lmonth { + variable MONTHS_LONG + return [lindex $MONTHS_LONG \ + [scan [clock format $clock -format %m] %d]] + } + + lweekday { + variable WDAYS_LONG + return [lindex $WDAYS_LONG [clock format $clock -format %w]] + } + + mday { + set value [clock format $clock -format %d] + } + + min { + set value [clock format $clock -format %M] + } + + mon { + set value [clock format $clock -format %m] + } + + month { + variable MONTHS_SHORT + return [lindex $MONTHS_SHORT [ + scan [clock format $clock -format %m] %d]] + } + + proper { + set gmt [clock format $clock -format "%Y-%m-%d %H:%M:%S" -gmt true] + if {[set diff [expr {($clock-[clock scan $gmt]) / 60}]] < 0} { + set s - + set diff [expr {-($diff)}] + } else { + set s + + } + set zone [format %s%02d%02d $s [ + expr {$diff / 60}] [expr {$diff % 60}]] + + variable WDAYS_SHORT + set wday [lindex $WDAYS_SHORT [clock format $clock -format %w]] + variable MONTHS_SHORT + set mon [lindex $MONTHS_SHORT [ + scan [clock format $clock -format %m] %d]] + + return [ + clock format $clock -format "$wday, %d $mon %Y %H:%M:%S $zone"] + } + + rclock { + #TODO: these paths are not covered by tests + if {$value eq "-now"} { + return 0 + } else { + return [expr {[clock seconds] - $clock}] + } + } + + sec { + set value [clock format $clock -format %S] + } + + wday { + return [clock format $clock -format %w] + } + + weekday { + variable WDAYS_SHORT + return [lindex $WDAYS_SHORT [clock format $clock -format %w]] + } + + yday { + set value [clock format $clock -format %j] + } + + year { + set value [clock format $clock -format %Y] + } + + zone { + set value [string trim [string map [list \t { }] $value]] + if {[set x [string last { } $value]] < 0} { + return 0 + } + set value [string range $value [expr {$x + 1}] end] + switch -- [set s [string index $value 0]] { + + - - { + if {$s eq "+"} { + #TODO: This path is not covered by tests + set s {} + } + set value [string trim [string range $value 1 end]] + if {( + [string length $value] != 4) + || + [scan $value %2d%2d h m] != 2 + || + $h > 12 + || + $m > 59 + || + ($h == 12 && $m > 0) + } { + error "malformed timezone-specification: $value" + } + set value $s[expr {$h * 60 + $m}] + } + + default { + set value [string toupper $value] + set z1 [list UT GMT EST EDT CST CDT MST MDT PST PDT] + set z2 [list 0 0 -5 -4 -6 -5 -7 -6 -8 -7] + if {[set x [lsearch -exact $z1 $value]] < 0} { + error "unrecognized timezone-mnemonic: $value" + } + set value [expr {[lindex $z2 $x] * 60}] + } + } + } + + date2gmt + - + date2local + - + dst + - + sday + - + szone + - + tzone + - + default { + error "unknown property $property" + } + } + + if {[set value [string trimleft $value 0]] eq {}} { + #TODO: this path is not covered by tests + set value 0 + } + return $value +} + +# ::mime::uniqueID -- +# +# Used to generate a 'globally unique identifier' for the content-id. +# The id is built from the pid, the current time, the hostname, and +# a counter that is incremented each time a message is sent. +# +# Arguments: +# +# Results: +# Returns the a string that contains the globally unique identifier +# that should be used for the Content-ID of an e-mail message. + +proc ::mime::uniqueID {} { + variable mime + + return <[pid].[clock seconds].[incr mime(cid)]@[info hostname]> +} + +# ::mime::parselexeme -- +# +# Used to implement a lookahead parser. +# +# Arguments: +# token The MIME token to operate on. +# +# Results: +# Returns the next token found by the parser. + +proc ::mime::parselexeme {token} { + # FRINK: nocheck + variable $token + upvar 0 $token state + + set state(input) [string trimleft $state(input)] + + set state(buffer) {} + if {$state(input) eq {}} { + set state(buffer) end-of-input + return [set state(lastC) LX_END] + } + + set c [string index $state(input) 0] + set state(input) [string range $state(input) 1 end] + + if {$c eq "("} { + set noteP 0 + set quoteP 0 + + while 1 { + append state(buffer) $c + + #TODO: some of these paths are not covered by tests + switch -- $c/$quoteP { + (/0 { + incr noteP + } + + \\/0 { + set quoteP 1 + } + + )/0 { + if {[incr noteP -1] < 1} { + if {[info exists state(comment)]} { + append state(comment) { } + } + append state(comment) $state(buffer) + + return [parselexeme $token] + } + } + + default { + set quoteP 0 + } + } + + if {[set c [string index $state(input) 0]] eq {}} { + set state(buffer) "end-of-input during comment" + return [set state(lastC) LX_ERR] + } + set state(input) [string range $state(input) 1 end] + } + } + + if {$c eq "\""} { + set firstP 1 + set quoteP 0 + + while 1 { + append state(buffer) $c + + switch -- $c/$quoteP { + "\\/0" { + set quoteP 1 + } + + "\"/0" { + if {!$firstP} { + return [set state(lastC) LX_QSTRING] + } + set firstP 0 + } + + default { + set quoteP 0 + } + } + + if {[set c [string index $state(input) 0]] eq {}} { + set state(buffer) "end-of-input during quoted-string" + return [set state(lastC) LX_ERR] + } + set state(input) [string range $state(input) 1 end] + } + } + + if {$c eq {[}} { + set quoteP 0 + + while 1 { + append state(buffer) $c + + switch -- $c/$quoteP { + \\/0 { + set quoteP 1 + } + + ]/0 { + return [set state(lastC) LX_DLITERAL] + } + + default { + set quoteP 0 + } + } + + if {[set c [string index $state(input) 0]] eq {}} { + set state(buffer) "end-of-input during domain-literal" + return [set state(lastC) LX_ERR] + } + set state(input) [string range $state(input) 1 end] + } + } + + if {[set x [lsearch -exact $state(tokenL) $c]] >= 0} { + append state(buffer) $c + + return [set state(lastC) [lindex $state(lexemeL) $x]] + } + + while 1 { + append state(buffer) $c + + switch -- [set c [string index $state(input) 0]] { + {} - " " - "\t" - "\n" { + break + } + + default { + if {[lsearch -exact $state(tokenL) $c] >= 0} { + break + } + } + } + + set state(input) [string range $state(input) 1 end] + } + + return [set state(lastC) LX_ATOM] +} + +# ::mime::mapencoding -- +# +# mime::mapencodings maps tcl encodings onto the proper names for their +# MIME charset type. This is only done for encodings whose charset types +# were known. The remaining encodings return {} for now. +# +# Arguments: +# enc The tcl encoding to map. +# +# Results: +# Returns the MIME charset type for the specified tcl encoding, or {} +# if none is known. + +proc ::mime::mapencoding {enc} { + + variable encodings + + if {[info exists encodings($enc)]} { + return $encodings($enc) + } + return {} +} + +# ::mime::reversemapencoding -- +# +# mime::reversemapencodings maps MIME charset types onto tcl encoding names. +# Those that are unknown return {}. +# +# Arguments: +# mimeType The MIME charset to convert into a tcl encoding type. +# +# Results: +# Returns the tcl encoding name for the specified mime charset, or {} +# if none is known. + +proc ::mime::reversemapencoding {mimeType} { + + variable reversemap + + set lmimeType [string tolower $mimeType] + if {[info exists reversemap($lmimeType)]} { + return $reversemap($lmimeType) + } + return {} +} + +# ::mime::word_encode -- +# +# Word encodes strings as per RFC 2047. +# +# Arguments: +# charset The character set to encode the message to. +# method The encoding method (base64 or quoted-printable). +# string The string to encode. +# ?-charset_encoded 0 or 1 Whether the data is already encoded +# in the specified charset (default 1) +# ?-maxlength maxlength The maximum length of each encoded +# word to return (default 66) +# +# Results: +# Returns a word encoded string. + +proc ::mime::word_encode {charset method string {args}} { + + variable encodings + + if {![info exists encodings($charset)]} { + error "unknown charset '$charset'" + } + + if {$encodings($charset) eq {}} { + error "invalid charset '$charset'" + } + + if {$method ne "base64" && $method ne "quoted-printable"} { + error "unknown method '$method', must be base64 or quoted-printable" + } + + # default to encoded and a length that won't make the Subject header to long + array set options [list -charset_encoded 1 -maxlength 66] + array set options $args + + if {$options(-charset_encoded)} { + set unencoded_string [::encoding convertfrom $charset $string] + } else { + set unencoded_string $string + } + + set string_length [string length $unencoded_string] + + if {!$string_length} { + return {} + } + + set string_bytelength [string bytelength $unencoded_string] + + # the 7 is for =?, ?Q?, ?= delimiters of the encoded word + set maxlength [expr {$options(-maxlength) - [string length $encodings($charset)] - 7}] + switch -exact -- $method { + base64 { + if {$maxlength < 4} { + error "maxlength $options(-maxlength) too short for chosen charset and encoding" + } + set count 0 + set maxlength [expr {($maxlength / 4) * 3}] + while {$count < $string_length} { + set length 0 + set enc_string {} + while {$length < $maxlength && $count < $string_length} { + set char [string range $unencoded_string $count $count] + set enc_char [::encoding convertto $charset $char] + if {$length + [string length $enc_char] > $maxlength} { + set length $maxlength + } else { + append enc_string $enc_char + incr count + incr length [string length $enc_char] + } + } + set encoded_word [string map [ + list \n {}] [base64 -mode encode -- $enc_string]] + append result "=?$encodings($charset)?B?$encoded_word?=\n " + } + # Trim off last "\n ", since the above code has the side-effect + # of adding an extra "\n " to the encoded string. + + set result [string range $result 0 end-2] + } + quoted-printable { + if {$maxlength < 1} { + error "maxlength $options(-maxlength) too short for chosen charset and encoding" + } + set count 0 + while {$count < $string_length} { + set length 0 + set encoded_word {} + while {$length < $maxlength && $count < $string_length} { + set char [string range $unencoded_string $count $count] + set enc_char [::encoding convertto $charset $char] + set qp_enc_char [qp_encode $enc_char 1] + set qp_enc_char_length [string length $qp_enc_char] + if {$qp_enc_char_length > $maxlength} { + error "maxlength $options(-maxlength) too short for chosen charset and encoding" + } + if { + $length + [string length $qp_enc_char] > $maxlength + } { + set length $maxlength + } else { + append encoded_word $qp_enc_char + incr count + incr length [string length $qp_enc_char] + } + } + append result "=?$encodings($charset)?Q?$encoded_word?=\n " + } + # Trim off last "\n ", since the above code has the side-effect + # of adding an extra "\n " to the encoded string. + + set result [string range $result 0 end-2] + } + {} { + # Go ahead + } + default { + error "Can't handle content encoding \"$method\"" + } + } + return $result +} + +# ::mime::word_decode -- +# +# Word decodes strings that have been word encoded as per RFC 2047. +# +# Arguments: +# encoded The word encoded string to decode. +# +# Results: +# Returns the string that has been decoded from the encoded message. + +proc ::mime::word_decode {encoded} { + + variable reversemap + + if {[regexp -- {=\?([^?]+)\?(.)\?([^?]*)\?=} $encoded \ + - charset method string] != 1 + } { + error "malformed word-encoded expression '$encoded'" + } + + set enc [reversemapencoding $charset] + if {$enc eq {}} { + error "unknown charset '$charset'" + } + + switch -exact -- $method { + b - + B { + set method base64 + } + q - + Q { + set method quoted-printable + } + default { + error "unknown method '$method', must be B or Q" + } + } + + switch -exact -- $method { + base64 { + set result [base64 -mode decode -- $string] + } + quoted-printable { + set result [qp_decode $string 1] + } + {} { + # Go ahead + } + default { + error "Can't handle content encoding \"$method\"" + } + } + + return [list $enc $method $result] +} + +# ::mime::field_decode -- +# +# Word decodes strings that have been word encoded as per RFC 2047 +# and converts the string from the original encoding/charset to UTF. +# +# Arguments: +# field The string to decode +# +# Results: +# Returns the decoded string in UTF. + +proc ::mime::field_decode {field} { + # ::mime::field_decode is broken. Here's a new version. + # This code is in the public domain. Don Libes + + # Step through a field for mime-encoded words, building a new + # version with unencoded equivalents. + + # Sorry about the grotesque regexp. Most of it is sensible. One + # notable fudge: the final $ is needed because of an apparent bug + # in the regexp engine where the preceding .* otherwise becomes + # non-greedy - perhaps because of the earlier ".*?", sigh. + + while {[regexp {(.*?)(=\?(?:[^?]+)\?(?:.)\?(?:[^?]*)\?=)(.*)$} $field \ + ignore prefix encoded field] + } { + # don't allow whitespace between encoded words per RFC 2047 + if {{} ne $prefix} { + if {![string is space $prefix]} { + append result $prefix + } + } + + set decoded [word_decode $encoded] + foreach {charset - string} $decoded break + + append result [::encoding convertfrom $charset $string] + } + append result $field + return $result +} + +## One-Shot Initialization + +::apply {{} { + variable encList + variable encAliasList + variable reversemap + + foreach {enc mimeType} $encList { + if {$mimeType eq {}} continue + set reversemap([string tolower $mimeType]) $enc + } + + foreach {enc mimeType} $encAliasList { + set reversemap([string tolower $mimeType]) $enc + } + + # Drop the helper variables + unset encList encAliasList + +} ::mime} + + +variable ::mime::internal 0 diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/natsort-0.1.1.5.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/natsort-0.1.1.5.tm new file mode 100644 index 00000000..0e4260b8 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/natsort-0.1.1.5.tm @@ -0,0 +1,1894 @@ +#! /usr/bin/env tclsh + + +package require flagfilter +namespace import ::flagfilter::check_flags + +namespace eval natsort { + proc scriptdir {} { + set possibly_linked_script [file dirname [file normalize [file join [info script] ...]]] + if {[file isdirectory $possibly_linked_script]} { + return $possibly_linked_script + } else { + return [file dirname $possibly_linked_script] + } + } + tcl::tm::add [scriptdir] +} + + +namespace eval natsort { + variable stacktrace_on 0 + + proc do_error {msg {then error}} { + #note we exit or error out even if debug selected - as every do_error call is meant to interrupt code processing at the site of call + #this is not just a 'logging' call even though it has log-like descriptors + lassign $then type code + if {$code eq ""} { + set code 1 + } + set type [string tolower $type] + set levels [list debug info notice warn error critical] + if {$type in [concat $levels exit]} { + puts stderr "|$type> $msg" + } else { + puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be one of '$levels' or 'exit '" + } + if {$::tcl_interactive} { + #may not always be desirable - but assumed to be more useful not to exit despite request, to aid in debugging + if {[string tolower $type] eq "exit"} { + puts stderr " (exit suppressed due to tcl_interactive - raising error instead)" + if {![string is digit -strict $code]} { + puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be: 'exit '" + } + } + return -code error $msg + } else { + if {$type ne "exit"} { + return -code error $msg + } else { + if {[string is digit -strict $code]} { + exit $code + } else { + puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be 'error' or 'exit '" + return -code error $msg + } + } + } + } + + + + + + + variable debug 0 + variable testlist + set testlist { + 00.test-firstposition.txt + 0001.blah.txt + 1.test-sorts-after-all-leadingzero-number-one-equivs.txt + 1010.thousand-and-ten.second.txt + 01010.thousand-and-ten.first.txt + 0001.aaa.txt + 001.zzz.txt + 08.octal.txt-last-octal + 008.another-octal-first-octal.txt + 08.again-second-octal.txt + 001.a.txt + 0010.reconfig.txt + 010.etc.txt + 005.etc.01.txt + 005.Etc.02.txt + 005.123.abc.txt + 200.somewhere.txt + 2zzzz.before-somewhere.txt + 00222-after-somewhere.txt + 005.00010.abc.txt + 005.a3423bc.00010.abc.txt + 005.001.abc.txt + 005.etc.1010.txt + 005.etc.010.txt + 005.etc.10.txt + " 005.etc.10.txt" + 005.etc.001.txt + 20.somewhere.txt + 4611686018427387904999999999-bignum.txt + 4611686018427387903-bigishnum.txt + 9223372036854775807-bigint.txt + etca-a + etc-a + etc2-a + a0001blah.txt + a010.txt + winlike-sort-difference-0.1.txt + winlike-sort-difference-0.1.1.txt + a1.txt + b1-a0001blah.txt + b1-a010.txt + b1-a1.txt + -a1.txt + --a1.txt + --a10.txt + 2.high-two.yml + 02.higher-two.yml + reconfig.txt + _common.stuff.txt + CASETEST.txt + casetest.txt + something.txt + some~thing.txt + someathing.txt + someThing.txt + thing.txt + thing_revised.txt + thing-revised.txt + "thing revised.txt" + "spacetest.txt" + " spacetest.txt" + " spacetest.txt" + "spacetest2.txt" + "spacetest 2.txt" + "spacetest02.txt" + name.txt + name2.txt + "name .txt" + "name2 .txt" + blah.txt + combined.txt + a001.txt + .test + .ssh + "Feb 10.txt" + "Feb 8.txt" + 1ab23v23v3r89ad8a8a8a9d.txt + "Folder (10)/file.tar.gz" + "Folder/file.tar.gz" + "Folder (1)/file (1).tar.gz" + "Folder (1)/file.tar.gz" + "Folder (01)/file.tar.gz" + "Folder1/file.tar.gz" + "Folder(1)/file.tar.gz" + + } + lappend testlist "Some file.txt" + lappend testlist " Some extra file1.txt" + lappend testlist " Some extra file01.txt" + lappend testlist " some extra file1.txt" + lappend testlist " Some extra file003.txt" + lappend testlist " Some file.txt" + lappend testlist "Some extra file02.txt" + lappend testlist "Program Files (x86)" + lappend testlist "01999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999991-bigger-pathologically-bignum.txt" + lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt" + lappend testlist "29999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-smaller-pathologically-bignum.txt" + lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 1.txt" + lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 01.txt" + lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt" + lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a01a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt" + lappend testlist "b1b1b1b1.txt" + lappend testlist "b1b01z1z1.txt" + lappend testlist "c1c111c1.txt" + lappend testlist "c1c1c1c1.txt" + + namespace eval overtype { + proc right {args} { + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? undertext overtext} + } + foreach {undertext overtext} [lrange $args end-1 end] break + + set opt(-overflow) 0 + array set opt [lrange $args 0 end-2] + + + set olen [string length $overtext] + set ulen [string length $undertext] + + if {$opt(-overflow)} { + return [string range $undertext 0 end-$olen]$overtext + } else { + if {$olen > $ulen} { + set diff [expr {$olen - $ulen}] + return [string range $undertext 0 end-$olen][string range $overtext 0 end-$diff] + } else { + return [string range $undertext 0 end-$olen]$overtext + } + } + } + proc left {args} { + # @c overtype starting at left (overstrike) + # @c can/should we use something like this?: 'format "%-*s" $len $overtext + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} + } + foreach {undertext overtext} [lrange $args end-1 end] break + + set opt(-ellipsis) 0 + set opt(-ellipsistext) {...} + set opt(-overflow) 0 + array set opt [lrange $args 0 end-2] + + + set len [string length $undertext] + set overlen [string length $overtext] + set diff [expr {$overlen - $len}] + + #puts stdout "====================>overtype: datalen:$len overlen:$overlen diff:$diff" + #puts stdout "====================>overtype: data: $overtext" + if {$diff > 0} { + if {$opt(-overflow)} { + return $overtext + } else { + if {$opt(-ellipsis)} { + return [overtype::left [string range $overtext 0 [expr {$len -1}]] $opt(-ellipsistext)] + } else { + return [string range $overtext 0 [expr {$len -1}]] + } + } + } else { + return "$overtext[string range $undertext $overlen end]" + } + } + + } + + #considered using hex to make large numbers more compact for viewing in debug output - but it's not that much shorter and probably obscures more than it helps. + proc hex2dec {largeHex} { + #todo - use punk::lib::hex2dec - (scan supports ll so can do larger hex values directly) + set res 0 + set largeHex [string map [list _ ""] $largeHex] + if {[string length $largeHex] <=7} { + #scan can process up to FFFFFFF and does so quickly + return [scan $largeHex %x] + } + foreach hexDigit [split $largeHex {}] { + set new 0x$hexDigit + set res [expr {16*$res + $new}] + } + return $res + } + proc dec2hex {decimalNumber} { + format %4.4llX $decimalNumber + } + + #punk::lib::trimzero + proc trimzero {number} { + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + #todo - consider human numeric split + #e.g consider SI suffixes k|KMGTPEZY in that order + + #in this context, for natural sorting - numeric segments don't contain underscores or other punctuation such as . - + etc. + #review - what about unicode equivalents such as wide numerals \UFF10 to \UFF19? unicode normalization? + proc split_numeric_segments {name} { + set segments [list] + while {[string length $name]} { + if {[scan $name {%[0-9]%n} chunk len] == 2} { + lappend segments $chunk + set name [string range $name $len end] + } + if {[scan $name {%[^0-9]%n} chunk len] == 2} { + lappend segments $chunk + set name [string range $name $len end] + } + } + return $segments + } + + proc padleft {str count {ch " "}} { + set val [string repeat $ch $count] + append val $str + set diff [expr {max(0,$count - [string length $str])}] + set offset [expr {max(0,$count - $diff)}] + set val [string range $val $offset end] + } + + + # Sqlite may have limited collation sequences available in default builds. + # with custom builds - there may be others such as 'natsort' - see https://sqlite.org/forum/forumpost/e4dc6f3331 + # This is of limited use with the few builtin collations available in 2023 ie binary,nocase & rtrim + # but may provide a quicker,flexible sort option, especially if/when more collation sequences are added to sqlite + # There are also prebuilt packages such as sqlite3-icu which allows things like "SELECT icu_load_collation('en_AU', 'australian');" + proc sort_sqlite {stringlist args} { + package require sqlite3 + + + set args [check_flags -caller natsort_sqlite -defaults [list -db :memory: -collate nocase -winlike 0 -topchars "\uFFFF" -debug 0 -splitchars [list / . - _] -extras {all}] -values $args] + set db [string trim [dict get $args -db]] + set collate [string trim [dict get $args -collate]] + set debug [string trim [dict get $args -debug]] + set topchars [string trim [dict get $args -topchars]] + + set topdot [expr {"." in $topchars}] + set topunderscore [expr {"_" in $topchars}] + + + sqlite3 db_sort_basic $db + set orderedlist [list] + db_sort_basic eval [string map [list %collate% $collate] {create table sqlitesort(index0 text COLLATE %collate%, name text COLLATE %collate%)}] + foreach nm $stringlist { + set segments [split_numeric_segments $nm] + set index "" + set s 0 + foreach seg $segments { + if {($s == 0) && ![string length [string trim $seg]]} { + #don't index leading space + } elseif {($s == 0) && ($topunderscore) && [string match _* [string trim $seg]]} { + append index "[padleft "0" 5]-d -100 topunderscore " + append index [string trim $seg] + } elseif {($s == 0) && ($topdot) && [string match .* [string trim $seg]]} { + append index "[padleft "0" 5]-d -50 topdot " + append index [string trim $seg] + } else { + if {[string is digit [string trim $seg]]} { + set basenum [trimzero [string trim $seg]] + set lengthindex "[padleft [string length $basenum] 5]-d" + append index "$lengthindex " + #append index [padleft $basenum 40] + append index $basenum + } else { + append index [string trim $seg] + } + } + incr s + } + puts stdout ">>$index" + db_sort_basic eval {insert into sqlitesort values($index,$nm)} + } + db_sort_basic eval [string map [list %collate% $collate] {select name from sqlitesort order by index0 COLLATE %collate% ASC, name COLLATE %collate% ASC }] { + lappend orderedlist $name + } + db_sort_basic close + return $orderedlist + } + + proc get_leading_char_count {str char} { + #todo - something more elegant? regex? + set count 0 + foreach c [split $str "" ] { + if {$c eq $char} { + incr count + } else { + break + } + } + return $count + } + proc stacktrace {} { + set stack "Stack trace:\n" + for {set i 1} {$i < [info level]} {incr i} { + set lvl [info level -$i] + set pname [lindex $lvl 0] + append stack [string repeat " " $i]$pname + + if {![catch {info args $pname} pargs]} { + foreach value [lrange $lvl 1 end] arg $pargs { + + if {$value eq ""} { + if {$arg != 0} { + info default $pname $arg value + } + } + append stack " $arg='$value'" + } + } else { + append stack " !unknown vars for $pname" + } + + append stack \n + } + return $stack + } + + proc get_char_count {str char} { + #faster than lsearch on split for str of a few K + expr {[string length $str]-[string length [string map [list $char {}] $str]]} + } + + proc build_key {chunk splitchars topdict tagconfig debug} { + variable stacktrace_on + if {$stacktrace_on} { + puts stderr "+++>[stacktrace]" + } + + set index_map [list - "" _ ""] + #e.g - need to maintain the order + #a b.txt + #a book.txt + #ab.txt + #abacus.txt + + + set original_splitchars [dict get $tagconfig original_splitchars] + + # tag_dashes test moved from loop - review + set tag_dashes 0 + if {![string length [dict get $tagconfig last_part_text_tag]]} { + #winlike + set tag_dashes 1 + } + if {("-" ni $original_splitchars)} { + set tag_dashes 1 + } + if {$debug >= 3} { + puts stdout "START build_key chunk : $chunk" + puts stdout "START build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" + } + + + ## index_map will have no effect if we've already split on the char anyway(?) + #foreach m [dict keys $index_map] { + # if {$m in $original_splitchars} { + # dict unset index_map $m + # } + #} + + #if {![string length $chunk]} return + + set result "" + if {![llength $splitchars]} { + #no more structural splits - but we need to examine numeric/non-numeric segments at the lowest level. + # we are at a leaf in the recursive split hierarchy + + set s "" ;#we never actually split on "" (unless that was put in splitchars.. but it probably shouldn't be) + set parts [list $chunk] ;#important to treat as list or leading/trailing whitespace lost + + + } else { + set s [lindex $splitchars 0] + if {"spudbucket$s" in "[split $chunk {}]"} { + error "dead-branch spudbucket" + set partindex [build_key $chunk [lrange $splitchars 1 end] $topdict $tagconfig $debug] + if {[dict get $tagconfig showsplits]} { + set pfx "(1${s}=)" ;# = sorts before _ + set partindex ${pfx}$partindex + } + + return $partindex + } else { + set parts_below_index "" + + if {$s ni [split $chunk ""]} { + #$s can be an empty string + set parts [list $chunk] + } else { + set parts [split $chunk $s] ;#whitespace preserved - even if splitting on s that is not in string. + } + #assert - we have a splitchar $s that is in the chunk - so at least one part + if {(![string length $s] || [llength $parts] == 0)} { + error "buld_key assertion false empty split char and/or no parts" + } + + set pnum 1 ;# 1 based for clarity of reading index in debug output + set subpart_count [llength $parts] + + set sub_splits [lrange $splitchars 1 end] ;#pass same splitchars to each subpart + foreach p $parts { + set partindex [build_key $p $sub_splits $topdict $tagconfig $debug] + set lastpart [expr {$pnum == $subpart_count}] + + + ####################### + set showsplits [dict get $tagconfig showsplits] + #split prefixing experiment - maybe not suitable for general use - as it affects sort order + #note that pfx must be consistent until last one, no matter how many partnumbers there are in total. + # we don't want to influence sort order before reaching end. + #e.g for: + #(1.=)... + #(1._)...(2._)...(3.=) + #(1._)...(2.=) + #Note that this is probably more suitable for highly structure dependant sorts where the results are maybe less.. natural. + if {$showsplits} { + if {$lastpart} { + set pfx "(${pnum}${s}_" + #set pfx "(${pnum}${s}=)" ;# = sorts before _ + } else { + set pfx "(${pnum}${s}_" + } + append parts_below_index $pfx + } + ####################### + + if {$lastpart} { + if {[string length $p] && [string is digit $p]} { + set last_part_tag "<22${s}>" + } else { + set last_part_tag "<33${s}>" + } + + set last_part_text_tag [dict get $tagconfig last_part_text_tag] + #for -winlike 1 there is no tag configured. Windows explorer likes to put things in the order: + # module-0.1.1.tm + # module-0.1.1.2.tm + # module-0.1.tm + # arguably -winlike 0 is more natural/human + # module-0.1.tm + # module-0.1.1.tm + # module-0.1.1.2.tm + + if {[string length $last_part_text_tag]} { + #replace only the first text-tag (<30>) from the subpart_index + if {[string match "<30?>*" $partindex]} { + #give textual string index a specific tag for last part in split only. e.g <130> for lower than integers + set partindex "<130>[string range $partindex 5 end]" + } + #append parts_below_index $last_part_tag + } + #set partindex $last_part_tag$partindex + + + } + append parts_below_index $partindex + + + + if {$showsplits} { + if {$lastpart} { + set suffix "${pnum}${s}=)" ;# = sorts before _ + } else { + set suffix "${pnum}${s}_)" + } + append parts_below_index $suffix + } + + + incr pnum + } + append parts_below_index "" ;# don't add anything at the tail that may perturb sort order + + if {$debug >= 3} { + set pad [string repeat " " 20] + puts stdout "END build_key chunk : $chunk " + puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" + puts stdout "END build_key ret below_index: $parts_below_index" + } + return $parts_below_index + + + } + } + + + + #puts stdout ">>>chunk:'$chunk'<<< split-on:$s parts: '$parts' splitchars: $splitchars -topdict:$topdict" + + + + + + #if {$chunk eq ""} { + # puts "___________________________________________!!!____" + #} + #puts stdout "-->chunk:$chunk $s parts:$parts" + + #puts stdout "---chunk:'$chunk' part:'$part' parts:'$parts' s:'$s'" + + + + + set segments [split_numeric_segments $chunk] ;#! + set stringindex "" + set segnum 0 + foreach seg $segments { + #puts stdout "=================---->seg:$seg segments:$segments" + #-strict ? + if {[string length $seg] && [string is digit $seg]} { + set basenum [trimzero [string trim $seg]] + set lengthindex "[padleft [string length $basenum] 4]d" + #append stringindex "<20>$lengthindex $basenum $seg" + } else { + set c1 [string range $seg 0 0] + #puts stdout "==============> c1'$c1' topdict: $topdict stringindex:$stringindex" + + if {$c1 in [dict keys $topdict]} { + set tag [dict get $topdict $c1] + #append stringindex "${tag}$c1" + #set seg [string range $seg 1 end] + } + #textindex + set leader "<30>" + set idx $seg + set idx [string trim $idx] + set idx [string tolower $idx] + set idx [string map $index_map $idx] + + + + + + #set the X-c count to match the length of the index - not the raw data + set lengthindex "[padleft [string length $idx] 4]c" + + #append stringindex "${leader}$idx $lengthindex $texttail" + } + } + + if {[llength $parts] != 1} { + error "build_key assertion fail llength parts != 1 parts:$parts" + } + + set segtail_clearance_buffer " " ;#space to clear other split indicators if using showsplits + set segtail $segtail_clearance_buffer + append segtail "\[" + set grouping "" + set pnum 0 + foreach p $parts { + set sublen_list [list] + set subsegments [split_numeric_segments $p] + set i 0 + + set partsorter "" + foreach sub $subsegments { + ##don't trim spaces here - it would be inconsistent. Some subs are pure whitespace - others have internal whitespace. e.g "a4 400b a b2" becomes "a 4 { } 400 {b a b} 2" + #mapping away all whitespace would be consistent, but not necessarily desirable. If it's in the index_map it'll happen anyway - so we don't do it manually here except for evaluating conditions. + set test_trim [string trim $sub] + set str $sub + set str [string tolower $str] + set str [string map $index_map $str] + if {[string length $test_trim] && [string is digit $test_trim]} { + append partsorter [trimzero $str] + } else { + append partsorter "$str" + } + append partsorter + } + + + foreach sub $subsegments { + + if {[string length $sub] && [string is digit $sub]} { + set basenum [trimzero [string trim $sub]] + set subequivs $basenum + set lengthindex "[padleft [string length $subequivs] 4]d " + set idx "$lengthindex [padleft $basenum 10]" ;#todo - cycle through data and determine longest + set tail [overtype::left [string repeat " " 10] $sub] + #set tail "" + } else { + set idx "" + + + set lookahead [lindex $subsegments $i+1] + if {![string length $lookahead]} { + set zeronum "[padleft 0 4]d0" + } else { + set zeronum "" + } + set subequivs $sub + #set subequivs [string trim $subequivs] + set subequivs [string tolower $subequivs] + set subequivs [string map $index_map $subequivs] + + append idx $subequivs + append idx $zeronum + + set idx $subequivs + + + # + + set ch "-" + if {$tag_dashes} { + #puts stdout "____TAG DASHES" + #winlike + set numleading [get_leading_char_count $seg $ch] + if {$numleading > 0} { + set texttail "<31-leading[padleft $numleading 4]$ch>" + } else { + set texttail "<30>" + } + set numothers [expr {[get_char_count $seg $ch] - $numleading}] + if {$debug >= 2} { + puts stdout "____dashcount: [get_char_count $seg $ch] numothers: $numothers" + } + if {$numothers > 0} { + append texttail "<31-others[padleft $numothers 4]$ch>" + } else { + append textail "<30>" + } + } else { + set texttail "<30>" + } + + + + + #set idx $partsorter + set tail "" + #set tail [string tolower $sub] ;#raw + #set tail $partsorter + #append tail ":[string tolower $p]" ;#we need the whole part - even though it makes the index much larger. !todo - tagconfig switch to toggle case sensitive sorting + } + + append grouping "$idx $tail|$s" + incr i + } + + + + + + if {$p eq ""} { + # no subsegments.. + set zeronum "[padleft 0 4]d0" + #append grouping "\u000$zerotail" + append grouping ".$zeronum" + } + + #append grouping | + #append grouping $s + #foreach len $sublen_list { + # append segtail "<[padleft $len 3]>" + #} + incr pnum + } + set grouping [string trimright $grouping $s] + append grouping "[padleft [llength $parts] 4]" + append segtail $grouping + + + #append segtail " <[padleft [llength $parts] 4]>" + + append segtail "\]" + + + #if {[string length $seg] && [string is digit $seg]} { + # append segtail "<20>" + #} else { + # append segtail "<30>" + #} + append stringindex $segtail + + incr segnum + + + + + lappend indices $stringindex + + if {[llength $indices] > 1} { + puts stderr "INDICES [llength $indices]: $stringindex" + error "build_key assertion error deadconcept indices" + } + + #topchar handling on splitter characters + #set c1 [string range $chunk 0 0] + if {$s in [dict keys $topdict]} { + set tag [dict get $topdict $s] + set joiner [string map [list ">" "$s>"] ${tag}] + #we have split on this character $s so if the first part is empty string then $s was a leading character + # we need to bring a tag out front for this, or it will be dominated by the leading sections-remaing tag + # (since the empty string produces no tag of it's own - ?) + if {[string length [lindex $parts 0]] == 0} { + set prefix ${joiner} + } else { + set prefix "" + } + } else { + #use standard character-data positioning tag if no override from topdict + set joiner "<30J>$s" + set prefix "" + } + + + set contentindex $prefix[join $indices $joiner] + if {[string length $s]} { + set split_indicator "" + } else { + set split_indicator "" + + } + if {![string length $s]} { + set s ~ + } + + #return "[overtype::left [string repeat { } 80] $contentindex][overtype::left [string repeat { } 10] [list $s $chunk]]" + #return $contentindex$split_indicator + #return [overtype::left [string repeat - 40] $contentindex] + + if {$debug >= 3} { + puts stdout "END build_key chunk : $chunk" + puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" + puts stdout "END build_key ret contentidx : $contentindex" + } + return $contentindex + } + + #---------------------------------------- + #line-processors - data always last argument - opts can be empty string + #all processor should accept empty opts and ignore opts if they don't use them + proc _lineinput_as_tcl1 {opts line} { + set out "" + foreach i $line { + append out "$i " + } + set out [string range $out 0 end-1] + return $out + } + #should be equivalent to above + proc _lineinput_as_tcl {opts line} { + return [concat {*}$line] + } + #will put extra tcl quoting if it was already tcl-shaped e.g text "a" -> {"a"} + proc _lineoutput_as_tcl {opts line} { + return [regexp -inline -all {\S+} $line] + } + + proc _lineinput_as_raw {opts line} { + return $line + } + proc _lineoutput_as_raw {opts line} { + return $line + } + + #words is opposite of tcl + proc _lineinput_as_words {opts line} { + #wordlike_parts + return [regexp -inline -all {\S+} $line] + } + proc _lineoutput_as_words {opts line} { + return [concat {*}$line] + } + + #opts same as tcllib csv::split - except without the 'line' element + #?-alternate? ?sepChar? ?delChar? + proc _lineinput_as_csv {opts line} { + package require csv + if {[lindex $opts 0] eq "-alternate"} { + return [csv::split -alternate $line {*}[lrange $opts 1 end]] + } else { + return [csv::split $line {*}$opts] + } + } + #opts same as tcllib csv::join + #?sepChar? ?delChar? ?delMode? + proc _lineoutput_as_csv {opts line} { + package require csv + return [csv::join $line {*}$opts] + } + #---------------------------------------- + proc sort {stringlist args} { + #puts stdout "natsort::sort args: $args" + variable debug + if {![llength $stringlist]} return + + #allow pass through of the check_flags flag -debugargs so it can be set by the caller + set debugargs 0 + if {[set posn [lsearch $args -debugargs]] >=0} { + if {$posn == [llength $args]-1} { + #-debugargs at tail of list + set debugargs 1 + } else { + set debugargs [lindex $args $posn+1] + } + } + + #-return flagged|defaults doesn't work Review. + #flagfilter global processor/allocator not working 2023-08 + set args [check_flags \ + -caller natsort::sort \ + -return supplied|defaults \ + -debugargs $debugargs \ + -defaults [list -collate nocase \ + -winlike 0 \ + -splits "\uFFFF" \ + -topchars {. _} \ + -showsplits 1 \ + -sortmethod ascii \ + -collate "\uFFFF" \ + -inputformat raw \ + -inputformatapply {index data} \ + -inputformatoptions "" \ + -outputformat raw \ + -outputformatoptions "" \ + -cols "\uFFFF" \ + -debug 0 -db "" -stacktrace 0 -splits "\uFFFF" -showsplits 0] \ + -required {all} \ + -extras {none} \ + -commandprocessors {} \ + -values $args] + + #csv unimplemented + + set winlike [dict get $args -winlike] + set topchars [dict get $args -topchars] + set cols [dict get $args -cols] + set debug [dict get $args -debug] + set stacktrace [dict get $args -stacktrace] + set showsplits [dict get $args -showsplits] + set splits [dict get $args -splits] + set sortmethod [dict get $args -sortmethod] + set opt_collate [dict get $args -collate] + set opt_inputformat [dict get $args -inputformat] + set opt_inputformatapply [dict get $args -inputformatapply] + set opt_inputformatoptions [dict get $args -inputformatoptions] + set opt_outputformat [dict get $args -outputformat] + set opt_outputformatoptions [dict get $args -outputformatoptions] + dict unset args -showsplits + dict unset args -splits + if {$debug} { + puts stdout "natsort::sort processed_args: $args" + if {$debug == 1} { + puts stdout "natsort::sort - try also -debug 2, -debug 3" + } + } + + #set sortmethod "-dictionary" ;# sorts a2b before a001b - possibly other strangenesses that are hard to reason about + + if {$sortmethod in [list dictionary ascii]} { + set sortmethod "-$sortmethod" + # -ascii is default for tcl lsort. + } else { + set sortmethod "-ascii" + } + + set allowed_collations [list nocase] + if {$opt_collate ne "\uFFFF"} { + if {$opt_collate ni $allowed_collations} { + error "natsort::sort unknown value for -collate option. Only acceptable value(s): $allowed_collations" + } + set nocaseopt "-$opt_collate" + } else { + set nocaseopt "" + } + set allowed_inputformats [list tcl raw csv words] + if {$opt_inputformat ni $allowed_inputformats} { + error "natsort::sort unknown value for -inputformat option. Only acceptable value(s): $allowed_inputformats" + } + set allowed_outputformats [list tcl raw csv words] + if {$opt_inputformat ni $allowed_outputformats} { + error "natsort::sort unknown value for -outputformat option. Only acceptable value(s): $allowed_outputformats" + } + + # + set winsplits [list / . _] + set commonsplits [list / . _ -] + #set commonsplits [list] + + set tagconfig [dict create] + dict set tagconfig last_part_text_tag "<19>" + if {$winlike} { + set splitchars $winsplits + #windows explorer sorts leading spaces at the top - which doesn't seem very helpful for keeping things together - but the explorer doesn't seem able to create leading spaces anyway. + set wintop [list "(" ")" { } {.} {_}] ;#windows specific order + foreach t $topchars { + if {$t ni $wintop} { + lappend wintop $t + } + } + set topchars $wintop + dict set tagconfig last_part_text_tag "" + } else { + set splitchars $commonsplits + } + if {$splits ne "\uFFFF"} { + set splitchars $splits + } + dict set tagconfig original_splitchars $splitchars + dict set tagconfig showsplits $showsplits + + #create topdict + set i 0 + set topdict [dict create] + foreach c $topchars { + incr i ;#start at 01 so that 00 reserved for final-split tag (allows x-0.1.txt to sort above x-0.1.1.txt by default. Use tagconfig to change, or choose -winlike 1 for explorer-like sorting) + dict set topdict $c "<0$i>" + } + set keylist [list] + + + if {$opt_inputformat eq "tcl"} { + set lineinput_transform [list _lineinput_as_tcl $opt_inputformatoptions] + } elseif {$opt_inputformat eq "csv"} { + set lineinput_transform [list _lineinput_as_csv $opt_inputformatoptions] + } elseif {$opt_inputformat eq "raw"} { + set lineinput_transform [list _lineinput_as_raw $opt_inputformatoptions] + } elseif {$opt_inputformat eq "words"} { + set lineinput_transform [list _lineinput_as_words $opt_inputformatoptions] + } + if {$opt_outputformat eq "tcl"} { + set lineoutput_transform [list _lineoutput_as_tcl $opt_outputformatoptions] + } elseif {$opt_outputformat eq "csv"} { + set lineoutput_transform [list _lineoutput_as_csv $opt_outputformatoptions] + } elseif {$opt_outputformat eq "raw"} { + set lineoutput_transform [list _lineoutput_as_raw $opt_outputformatoptions] + } elseif {$opt_outputformat eq "words"} { + set lineoutput_transform [list _lineoutput_as_words $opt_outputformatoptions] + } + + + if {("data" in $opt_inputformatapply) || ("index" in $opt_inputformatapply)} { + if {$opt_inputformat eq "raw"} { + set tf_stringlist $stringlist + } else { + set tf_stringlist [list] + foreach v $stringlist { + lappend tf_stringlist [{*}$lineinput_transform $v] + } + } + if {"data" in $opt_inputformatapply} { + set tf_data_stringlist $tf_stringlist + } else { + set tf_data_stringlist $stringlist + } + if {"index" in $opt_inputformatapply} { + set tf_index_stringlist $tf_stringlist + } else { + set tf_index_stringlist $stringlist + } + } else { + set tf_data_stringlist $stringlist + set tf_index_stringlist $stringlist + } + + + + if {$stacktrace} { + puts stdout [natsort::stacktrace] + set natsort::stacktrace_on 1 + } + if {$cols eq "\uFFFF"} { + set colkeys [lmap v $stringlist {}] + } else { + set colkeys [list] + foreach v $tf_index_stringlist { + set lineparts $v + set k [list] + foreach c $cols { + lappend k [lindex $lineparts $c] + } + lappend colkeys [join $k "_"] ;#use a common-split char - Review + } + } + #puts stdout "colkeys: $colkeys" + + if {$opt_inputformat eq "raw"} { + #no inputformat was applied - can just use stringlist + foreach value $stringlist ck $colkeys { + set contentindex [build_key $value $splitchars $topdict $tagconfig $debug] + set colindex [build_key $ck $splitchars $topdict $tagconfig $debug] + lappend keylist ${colindex}-${contentindex}-$value ;#note: entire raw value used for final sort disambiguation (can be whitespace that was ignored in indexing) + } + } else { + foreach keyinput $tf_index_stringlist datavalue $tf_data_stringlist ck $colkeys { + #data may or may not have been transformed + #column index may or may not have been built with transformed data + + set contentindex [build_key $keyinput $splitchars $topdict $tagconfig $debug] + set colindex [build_key $ck $splitchars $topdict $tagconfig $debug] + lappend keylist ${colindex}-${contentindex}-$datavalue ;#note: entire value used for final sort disambiguation (can be whitespace that was ignored in indexing) + } + } + #puts stderr "keylist: $keylist" + + ################################################################################################### + # Use the generated keylist to do the actual sorting + # select either the transformed or raw data as the corresponding output + ################################################################################################### + if {[string length $nocaseopt]} { + set sortcommand [list lsort $sortmethod $nocaseopt -indices $keylist] + } else { + set sortcommand [list lsort $sortmethod -indices $keylist] + } + if {$opt_outputformat eq "raw"} { + #raw output means no further transformations - it doesn't mean there wasn't a transform applied on the input side + #use the tf_data_stringlist in the output - which will be the same as the input stringlist if no input transform applied for data. + #(Also - it may or may not have been *sorted* on transformed data depending on whether 'index' was in $opt_inputformatapply) + foreach idx [{*}$sortcommand] { + lappend result [lindex $tf_data_stringlist $idx] + } + } else { + #we need to apply an output format + #The data may or may not have been transformed at input + foreach idx [{*}$sortcommand] { + lappend result [{*}$lineoutput_transform [lindex $tf_data_stringlist $idx]] + } + } + ################################################################################################### + + + + + + if {$debug >= 2} { + set screen_width 250 + set max_val 0 + set max_idx 0 + ##### calculate colum widths + foreach i [{*}$sortcommand] { + set len_val [string length [lindex $stringlist $i]] + if {$len_val > $max_val} { + set max_val $len_val + } + set len_idx [string length [lindex $keylist $i]] + if {$len_idx > $max_idx} { + set max_idx $len_idx + } + } + #### + set l_width [expr {$max_val + 1}] + set leftcol [string repeat " " $l_width] + set r_width [expr {$screen_width - $l_width - 1}] + set rightcol [string repeat " " $r_width] + set str [overtype::left $leftcol RAW] + puts stdout " $str Index with possibly transformed data at tail" + foreach i [{*}$sortcommand] { + #puts stdout "|d> [overtype::left $leftcol [lindex $stringlist $i] ] [lindex $keylist $i]" + set index [lindex $keylist $i] + set len_idx [string length $index] + set rowcount [expr {$len_idx / $r_width}] + if {($len_idx % $r_width) > 0} { + incr rowcount + } + set rows [list] + for {set r 0} {$r < $rowcount} {incr r} { + lappend rows [string range $index 0 $r_width-$r] + set index [string range $index $r_width end] + } + + set r 0 + foreach idxpart $rows { + if {$r == 0} { + #use the untransformed stringlist + set str [overtype::left $leftcol [lindex $stringlist $i]] + } else { + set str [overtype::left $leftcol ...]] + } + puts stdout " $str $idxpart" + incr r + } + #puts stdout "|> '[lindex $stringlist $i]'" + #puts stdout "|> [lindex $keylist $i]" + } + + puts stdout "|debug> topdict: $topdict" + puts stdout "|debug> splitchars: $splitchars" + } + return $result + } + + + + #Note that although leading whitespace isn't a commonly used feature of filesystem names - it's possible at least on FreeBSD,windows and linux so we should try to handle it sensibly. + proc sort_experiment {stringlist args} { + package require sqlite3 + + variable debug + set args [check_flags -caller natsort::sort \ + -defaults [dict create -db :memory: -collate nocase -nullvalue "->NULL<" -winlike 0 -topchars [list] -debug 0] \ + -extras {all} \ + -values $args] + set db [string trim [dict get $args -db]] + set collate [string trim [dict get $args -collate]] + set winlike [string trim [dict get $args -winlike]] + set debug [string trim [dict get $args -debug]] + set nullvalue [string trim [dict get $args -nullvalue]] + + + set topchars [string trim [dict get $args -topchars]] + + set topdot [expr {"." in $topchars}] + set topunderscore [expr {"_" in $topchars}] + + + sqlite3 db_natsort2 $db + #-- + #our table must handle the name with the greatest number of numeric/non-numeric splits. + #This means a single list member with pathological naming e.g a1a1a1a1a1a1a1a1a1a1a1.txt could greatly extend the number of columns and indices and affect performance. + #review: could be optimised to aggregate the tail into a single index, as the the extra columns won't assist in ordering, but make the table and query bigger. + # we should probably determine the longest common sequence of splits in the input list and add only one more index for the segment after that. + set maxsegments 0 + #-- + set prefix "idx" + + #note - there will be more columns in the sorting table than segments. + # (a segment equals one of the numeric & non-numeric string portions returned from 'split_numeric_sgements') + #--------------------------- + # consider + # a123b.v1.2.txt + # a123b.v1.3beta1.txt + # these have the following segments: + # a 123 b.v 1 . 2 .txt + # a 123 b.v 1 . 3 beta 1 .txt + #--------------------------- + # The first string has 7 segments (numbered 0 to 6) + # the second string has 9 segments + # + # for example when the data has any elements in a segment position that are numeric (e.g 0001 123) + # - then an index column with numeric equivalents will be created (e.g 0001 becomes 1), and any non-numeric values in that column will get mapped to a negative value (for special cases) or a high value such as NULL (with NULLS LAST sql support) + # + # when a segment + + #cycle through all strings - we cannot build tabledef as we go because the column type depends on whether all segments for a particular column are text vs int-equivalent. + array set segmentinfo {} + foreach nm $stringlist { + set segments [split_numeric_segments $nm] + if {![string length [string trim [lindex $segments 0]]]} { + if {[string is digit [string trim [lindex $segments 1]]]} { + #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though) + set segments [lrange $segments 1 end] + } + } + + + set c 0 ;#start of index columns + if {[llength $segments] > $maxsegments} { + set maxsegments [llength $segments] + } + foreach seg $segments { + set seg [string trim $seg] + set column_exists [info exists segmentinfo($c,type)] + if {[string is digit $seg]} { + if {$column_exists} { + #override it (may currently be text or int) + set segmentinfo($c,type) "int" + } else { + #new column + set segmentinfo($c,name) ${prefix}$c + set segmentinfo($c,type) "int" + } + } else { + #text never overrides int + if {!$column_exists} { + set segmentinfo($c,name) ${prefix}$c + set segmentinfo($c,type) "text" + } + } + incr c + } + } + if {$debug} { + puts stdout "Largest number of num/non-num segments in data: $maxsegments" + #parray segmentinfo + } + + # + set tabledef "" + set ordered_column_names [list] + set ordered_segmentinfo_tags [lsort -dictionary [array names segmentinfo *]] + foreach k $ordered_segmentinfo_tags { + lassign [split $k ,] c tag + if {$tag eq "type"} { + set type [set segmentinfo($k)] + if {$type eq "int"} { + append tabledef "$segmentinfo($c,name) int," + } else { + append tabledef "$segmentinfo($c,name) text COLLATE $collate," + } + append tabledef "raw$c text COLLATE $collate," + lappend ordered_column_names $segmentinfo($c,name) + lappend ordered_column_names raw$c ;#additional index column not in segmentinfo + } + if {$tag eq "name"} { + #lappend ordered_column_names $segmentinfo($k) + } + } + append tabledef "name text" + + #puts stdout "tabledef:$tabledef" + + + db_natsort2 eval [string map [list %tabledef% $tabledef] {create table natsort(%tabledef%)}] + + + foreach nm $stringlist { + array unset intdata + array set intdata {} + array set rawdata {} + #init array and build sql values string + set sql_insert "insert into natsort values(" + for {set i 0} {$i < $maxsegments} {incr i} { + set intdata($i) "" + set rawdata($i) "" + append sql_insert "\$intdata($i),\$rawdata($i)," + } + append sql_insert "\$nm" ;#don't manipulate name value in any way - e.g must leave all whitespace as the sort must return exactly the same elements as in the original list. + append sql_insert ")" + + set segments [split_numeric_segments $nm] + if {![string length [string trim [lindex $segments 0]]]} { + if {[string is digit [string trim [lindex $segments 1]]]} { + #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though) + set segments [lrange $segments 1 end] + } + } + set values "" + set c 0 + foreach seg $segments { + if {[set segmentinfo($c,type)] eq "int"} { + if {[string is digit [string trim $seg]]} { + set intdata($c) [trimzero [string trim $seg]] + } else { + catch {unset intdata($c)} ;#set NULL - sorts last + if {($c == 0) && ($topunderscore) && [string match _* [string trim $seg]]} { + set intdata($c) -100 + } + if {($c == 0) && ($topdot) && [string match .* [string trim $seg]]} { + set intdata($c) -50 + } + } + set rawdata($c) [string trim $seg] + } else { + #pure text column + #set intdata($c) [string trim $seg] ;#ignore leading/trailing whitespace - we sort first on trimmed version, then refine with the sort on rawdata index + #catch {unset indata($c)} + set indata($c) [string trim $seg] + set rawdata($c) $seg + } + #set rawdata($c) [string trim $seg]# + #set rawdata($c) $seg + incr c + } + db_natsort2 eval $sql_insert + } + + set orderedlist [list] + + if {$debug} { + db_natsort2 eval {select * from pragma_table_info('natsort')} rowdata { + parray rowdata + } + } + set orderby "order by " + + foreach cname $ordered_column_names { + if {[string match "idx*" $cname]} { + append orderby "$cname ASC NULLS LAST," + } else { + append orderby "$cname ASC," + } + } + append orderby " name ASC" + #append orderby " NULLS LAST" ;#?? + + #e.g "order by idx0 ASC, raw0 ASC, idx1 ASC .... name ASC" + if {$debug} { + puts stdout "orderby clause: $orderby" + } + db_natsort2 eval [string map [list %orderby% $orderby] {select * from natsort %orderby%}] rowdata { + set line "- " + #parray rowdata + set columnnames $rowdata(*) + #puts stdout "columnnames: $columnnames" + #[lsort -dictionary [array names rowdata] + append line "$rowdata(name) \n" + foreach nm $columnnames { + if {$nm ne "name"} { + append line "$nm: $rowdata($nm) " + } + } + #puts stdout $line + #puts stdout "$rowdata(name)" + lappend orderedlist $rowdata(name) + } + + db_natsort2 close + return $orderedlist + } +} + + +#application section e.g this file might be linked from /usr/local/bin/natsort +namespace eval natsort { + namespace import ::flagfilter::check_flags + + proc called_directly_namematch {} { + global argv0 + #see https://wiki.tcl-lang.org/page/main+script + #trailing ... let's us resolve symlinks in last component of the path (could be something else like ___ but ... seems unlikely to collide with anything in the filesystem) + if {[info exists argv0] + && + [file dirname [file normalize [file join [info script] ...]]] + eq + [file dirname [file normalize [file join $argv0 ...]]] + } { + return 1 + } else { + #puts stdout "norm info script: [file dirname [file normalize [file join [info script] ...]]]" + #puts stdout "norm argv0 : [file dirname [file normalize [file join $argv0 ...]]]" + return 0 + } + } + #Review issues around comparing names vs using inodes (esp with respect to samba shares) + proc called_directly_inodematch {} { + global argv0 + if {[info exists argv0] + && [file exists [info script]] && [file exists $argv0]} { + file stat $argv0 argv0Info + file stat [info script] scriptInfo + expr {$argv0Info(dev) == $scriptInfo(dev) + && $argv0Info(ino) == $scriptInfo(ino)} + } else { + return 0 + } + } + + set is_namematch [called_directly_namematch] + set is_inodematch [called_directly_inodematch] + #### + #review - reliability of mechanisms to determine direct calls + # we don't want application being called when being used as a library, but we need it to run if called directly or from symlinks etc + #-- choose a policy and leave the others commented. + #set is_called_directly $is_namematch + #set is_called_directly $is_inodematch + set is_called_directly [expr {$is_namematch || $is_inodematch}] + #set is_called_directly [expr {$is_namematch && $is_inodematch}] + ### + + + #puts stdout "called_directly_name: [called_directly_namematch] called_directly_inode: [called_directly_inodematch]" + + + # + + + proc test_pass_fail_message {pass {additional ""}} { + variable test_fail_msg + variable test_pass_msg + if {$pass} { + puts stderr $test_pass_msg + } else { + puts stderr $test_fail_msg + } + puts stderr $additional + } + + variable test_fail_msg "XXXXXXXXXXXX FAIL XXXXXXXXXXXXX" + variable test_pass_msg "------------ PASS -------------" + proc test_sort_1 {args} { + package require struct::list + puts stderr "---$args" + set args [check_flags -caller natsort:test_sort_1 -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] -values $args] + + puts stderr "test_sort_1 got args: $args" + + set unsorted_input { + 2.2.2 + 2.2.2.2 + 1a.1.1 + 1a.2.1.1 + 1.12.1 + 1.2.1.1 + 1.02.1.1 + 1.002b.1.1 + 1.1.1.2 + 1.1.1.1 + } + set input { +1.1.1 +1.1.1.2 +1.002b.1.1 +1.02.1.1 +1.2.1.1 +1.12.1 +1a.1.1 +1a.2.1.1 +2.2.2 +2.2.2.2 + } + + set sorted [natsort::sort $input {*}$args] + set is_match [struct::list equal $input $sorted] + + set msg "windows-explorer order" + + test_pass_fail_message $is_match $msg + puts stdout [string repeat - 40] + puts stdout INPUT + puts stdout [string repeat - 40] + foreach item $input { + puts stdout $item + } + puts stdout [string repeat - 40] + puts stdout OUTPUT + puts stdout [string repeat - 40] + foreach item $sorted { + puts stdout $item + } + test_pass_fail_message $is_match $msg + return [expr {!$is_match}] + } + proc test_sort_showsplits {args} { + package require struct::list + + set args [check_flags -caller natsort:test_sort_1 \ + -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 1 ] \ + -extras {all} \ + -values $args] + + set input1 { + a-b.txt + a.b.c.txt + b.c-txt + } + + + set input2 { + a.b.c.txt + a-b.txt + b.c-text + } + + foreach {msg testlist } [list "custom-order" $input1 "windows-explorer (should work with -winlike 1)" $input2] { + set sorted [natsort::sort $testlist {*}$args] + set is_match [struct::list equal $testlist $sorted] + + test_pass_fail_message $is_match $msg + puts stderr "INPUT" + puts stderr "[string repeat - 40]" + foreach item $testlist { + puts stdout $item + } + puts stderr "[string repeat - 40]" + puts stderr "OUTPUT" + puts stderr "[string repeat - 40]" + foreach item $sorted { + puts stdout $item + } + + test_pass_fail_message $is_match $msg + } + + #return [expr {!$is_match}] + + } + + #tcl dispatch order - non flag items up front + #trailing flags are paired even if supplied as solo flags e.g -l becomes -l 1 + proc commandline_ls {args} { + set operands [list] + set posn 0 + foreach a $args { + if {![string match -* $a]} { + lappend operands $a + } else { + set flag1_posn $posn + break + } + incr posn + } + set args [lrange $args $flag1_posn end] + + + set debug 0 + set posn [lsearch $args -debug] + if {$posn > 0} { + if {[lindex $args $posn+1]} { + set debug [lindex $args $posn+1] + } + } + if {$debug} { + puts stderr "|debug>commandline_ls got $args" + } + + #if first operand not supplied - replace it with current working dir + if {[lindex $operands 0] eq "\uFFFF"} { + lset operands 0 [pwd] + } + + set targets [list] + foreach op $operands { + if {$op ne "\uFFFF"} { + set opchars [split [file tail $op] ""] + if {"?" in $opchars || "*" in $opchars} { + lappend targets $op + } else { + #actual file or dir + set targetitem $op + set targetitem [file normalize $op] + if {![file exists $targetitem]} { + if {$debug} { + puts stderr "|debug>commandline_ls Unable to access path '$targetitem'" + } + } + lappend targets $targetitem + if {$debug} { + puts stderr "|debug>commandline_ls listing for $targetitem" + } + } + } + } + set args [check_flags -caller commandline_ls \ + -return flagged|defaults \ + -debugargs 0 \ + -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 -algorithm sort] \ + -required {all} \ + -extras {all} \ + -soloflags {-v -l} \ + -commandprocessors {} \ + -values $args ] + if {$debug} { + puts stderr "|debug>args: $args" + } + + + set algorithm [dict get $args -algorithm] + dict unset args -algorithm + + set allfolders [list] + set allfiles [list] + foreach item $targets { + if {[file exists $item]} { + if {[file type $item] eq "directory"} { + set dotfolders [glob -nocomplain -directory $item -type {d} -tail .*] + set folders [glob -nocomplain -directory $item -type {d} -tail *] + set allfolders [concat $allfolders $dotfolders $folders] + + set dotfiles [glob -nocomplain -directory $item -type {f} -tail .*] + set files [glob -nocomplain -directory $item -type {f} -tail *] + set allfiles [concat $allfiles $dotfiles $files] + } else { + #file (or link?) + set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]] + set allfiles [concat $allfiles $files] + } + } else { + set folders [glob -nocomplain -directory $item -type {d} -tail [file tail $item]] + set allfolders [concat $allfolders $folders] + set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]] + set allfiles [concat $allfiles $files] + } + } + + + set sorted_folders [natsort::sort $allfolders {*}$args] + set sorted_files [natsort::sort $allfiles {*}$args] + + foreach fold $sorted_folders { + puts stdout $fold + } + foreach file $sorted_files { + puts stdout $file + } + + return "-- ok printed to stdout [llength $sorted_folders] folders and [llength $sorted_files] files --" + } + + package require argp + argp::registerArgs commandline_test { + { -showsplits boolean 0} + { -stacktrace boolean 0} + { -debug boolean 0} + { -winlike boolean 0} + { -db string ":memory:"} + { -collate string "nocase"} + { -algorithm string "sort"} + { -topchars string "\uFFFF"} + { -testlist string {10 1 30 3}} + } + argp::setArgsNeeded commandline_test {-stacktrace} + proc commandline_test {test args} { + variable testlist + puts stdout "commandline_test got $args" + argp::parseArgs opts + puts stdout "commandline_test got [array get opts]" + set args [check_flags -caller natsort_commandline \ + -return flagged|defaults \ + -defaults [list -db :memory: -collate nocase -testlist $testlist -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ + -values $args] + + if {[string tolower $test] in [list "1" "true"]} { + set test "sort" + } else { + if {![llength [info commands $test]]} { + error "test $test not found" + } + } + dict unset args -test + set stacktrace [dict get $args -stacktrace] + # dict unset args -stacktrace + + set argtestlist [dict get $args -testlist] + dict unset args -testlist + + + set debug [dict get $args -debug] + + set collate [dict get $args -collate] + set db [dict get $args -db] + set winlike [dict get $args -winlike] + set topchars [dict get $args -topchars] + + + puts stderr "|test>-----start natsort::$test--- input list size : [llength $argtestlist]" + #set resultlist [$test $argtestlist -db $db -collate $collate -topchars $topchars -winlike $winlike] + set resultlist [$test $argtestlist {*}$args] + foreach nm $resultlist { + puts stdout $nm + } + puts stdout "|test>-----end natsort::$test--- sorted list size: [llength $resultlist]" + return "test end" + } + proc commandline_runtests {runtests args} { + set argvals [check_flags -caller commandline_runtests \ + -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits "\uFFFF" -runtests 1] \ + -values $args] + + puts stderr "runtests args: $argvals" + + #set runtests [dict get $argvals -runtests] + dict unset argvals -runtests + dict unset argvals -algorithm + + puts stderr "runtests args: $argvals" + #exit 0 + + set test_prefix "::natsort::test_sort_" + + if {$runtests eq "1"} { + set runtests "*" + } + + + set testcommands [info commands ${test_prefix}${runtests}] + if {![llength $testcommands]} { + puts stderr "No test commands matched -runtests argument '$runtests'" + puts stderr "Use 1 to run all tests" + set alltests [info commands ${test_prefix}*] + puts stderr "Valid tests are:" + + set prefixlen [string length $test_prefix] + foreach t $alltests { + set shortname [string range $t $prefixlen end] + puts stderr "$t = -runtests $shortname" + } + + } else { + foreach cmd $testcommands { + puts stderr [string repeat - 40] + puts stderr "calling $cmd with args: '$argvals'" + puts stderr [string repeat - 40] + $cmd {*}$argvals + } + } + exit 0 + } + proc help {args} { + puts stdout "natsort::help got '$args'" + return "Help not implemented" + } + proc natsort_pipe {args} { + #PIPELINE to take input list on stdin and write sorted list to stdout + #strip - from arglist + #set args [check_flags -caller natsort_pipeline \ + # -return all \ + # -defaults [list -db :memory: -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ + # -values $args] + + + set debug [dict get $args -debug] + if {$debug} { + puts stderr "|debug> natsort_pipe got args:'$args'" + } + set algorithm [dict get $args -algorithm] + dict unset args -algorithm + + set proclist [info commands ::natsort::sort*] + set algos [list] + foreach p $proclist { + lappend algos [namespace tail $p] + } + if {$algorithm ni [list {*}$proclist {*}$algos]} { + do_error "valid sort mechanisms: $algos" 2 + } + + + set input_list [list] + while {![eof stdin]} { + if {[gets stdin line] > 0} { + lappend input_list $line + } else { + if {[eof stdin]} { + + } else { + after 10 + } + } + } + + if {$debug} { + puts stderr "|debug> received [llength $input_list] list elements" + } + + set resultlist [$algorithm $input_list {*}$args] + if {$debug} { + puts stderr "|debug> returning [llength $resultlist] list elements" + } + foreach r $resultlist { + puts stdout $r + } + #exit 0 + + } + if {($is_called_directly)} { + set cmdprocessors { + {helpfinal {match "^help$" dispatch natsort::help}} + {helpfinal {sub -topic default "NONE"}} + } + #set args [check_flags \ + # -caller test1 \ + # -debugargs 2 \ + # -return arglist \ + # -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ + # -required {none} \ + # -extras {all} \ + # -commandprocessors $cmdprocessors \ + # -values $::argv ] + interp alias {} do_filter {} ::flagfilter::check_flags + + #mashopts are generally single-letter opts that can be run together e.g -l -d as -ld + set cmdprocessors { + {helpcmd {match "^help$" dispatch natsort::help singleopts {-v}}} + {helpcmd {sub -operand default \uFFFF singleopts {-l}}} + {lscmd {match "^ls$" dispatch natsort::commandline_ls dispatchtype tcl dispatchglobal 1 mashopts {-l -a} singleopts {-l -a} pairopts {} longopts {--color=always}}} + {lscmd {sub dir default "\uFFFF"}} + {lscmd {sub dir2 default "\uFFFF"}} + {lscmd {sub dir3 default "\uFFFF"}} + {lscmd {sub dir4 default "\uFFFF"}} + {lscmd {sub dir5 default "\uFFFF"}} + {lscmd {sub dir6 default "\uFFFF"}} + {runtests {match "^-tests$" dispatch natsort::commandline_runtests singleopts {-l}}} + {runtests {sub testname default "1" singleopts {-l}}} + {pipecmd {match "^-$" dispatch natsort::natsort_pipe dispatchtype tcl}} + } + set arglist [do_filter \ + -debugargs 0 \ + -debugargsonerror 2 \ + -caller cline_dispatch1 \ + -return all \ + -soloflags {-v -x} \ + -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \ + -required {all} \ + -extras {all} \ + -commandprocessors $cmdprocessors \ + -values $::argv ] + + + #mashopts are generally single-letter opts that can be run together e.g -l -d as -ld + set cmdprocessors { + {testcmd {match "^test$" dispatch natsort::commandline_test singleopts {-l}}} + {testcmd {sub testname default "1" singleopts {-l}}} + } + set arglist [check_flags \ + -debugargs 0 \ + -caller cline_dispatch2 \ + -return all \ + -soloflags {-v -l} \ + -defaults [list -collate nocase -algorithm sort -testlist "1 2 3 10" -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \ + -required {all} \ + -extras {all} \ + -commandprocessors $cmdprocessors \ + -values $::argv ] + + + + + #set cmdprocessors [list] + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors $cmdprocessors -values $::argv ] + + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {-cmd {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ] + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {{-cmd -default help} {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ] + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {ls {ls lsdir -default "\uFFFF"}} -values $::argv ] + + exit 0 + + if {$::argc} { + + } + } +} + + +package provide natsort [namespace eval natsort { + variable version + set version 0.1.1.5 +}] + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/natsort-0.1.1.6.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/natsort-0.1.1.6.tm new file mode 100644 index 00000000..1d91b53f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/natsort-0.1.1.6.tm @@ -0,0 +1,1938 @@ +#! /usr/bin/env tclsh + + +package require flagfilter +namespace import ::flagfilter::check_flags + +namespace eval natsort { + proc scriptdir {} { + set possibly_linked_script [file dirname [file normalize [file join [info script] ...]]] + if {[file isdirectory $possibly_linked_script]} { + return $possibly_linked_script + } else { + return [file dirname $possibly_linked_script] + } + } + if {![interp issafe]} { + tcl::tm::add [scriptdir] + } +} + + +namespace eval natsort { + variable stacktrace_on 0 + + proc do_error {msg {then error}} { + #note we exit or error out even if debug selected - as every do_error call is meant to interrupt code processing at the site of call + #this is not just a 'logging' call even though it has log-like descriptors + lassign $then type code + if {$code eq ""} { + set code 1 + } + set type [string tolower $type] + set levels [list debug info notice warn error critical] + if {$type in [concat $levels exit]} { + puts stderr "|$type> $msg" + } else { + puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be one of '$levels' or 'exit '" + } + if {$::tcl_interactive} { + #may not always be desirable - but assumed to be more useful not to exit despite request, to aid in debugging + if {[string tolower $type] eq "exit"} { + puts stderr " (exit suppressed due to tcl_interactive - raising error instead)" + if {![string is digit -strict $code]} { + puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be: 'exit '" + } + } + return -code error $msg + } else { + if {$type ne "exit"} { + return -code error $msg + } else { + if {[string is digit -strict $code]} { + exit $code + } else { + puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be 'error' or 'exit '" + return -code error $msg + } + } + } + } + + + + + + + variable debug 0 + variable testlist + set testlist { + 00.test-firstposition.txt + 0001.blah.txt + 1.test-sorts-after-all-leadingzero-number-one-equivs.txt + 1010.thousand-and-ten.second.txt + 01010.thousand-and-ten.first.txt + 0001.aaa.txt + 001.zzz.txt + 08.octal.txt-last-octal + 008.another-octal-first-octal.txt + 08.again-second-octal.txt + 001.a.txt + 0010.reconfig.txt + 010.etc.txt + 005.etc.01.txt + 005.Etc.02.txt + 005.123.abc.txt + 200.somewhere.txt + 2zzzz.before-somewhere.txt + 00222-after-somewhere.txt + 005.00010.abc.txt + 005.a3423bc.00010.abc.txt + 005.001.abc.txt + 005.etc.1010.txt + 005.etc.010.txt + 005.etc.10.txt + " 005.etc.10.txt" + 005.etc.001.txt + 20.somewhere.txt + 4611686018427387904999999999-bignum.txt + 4611686018427387903-bigishnum.txt + 9223372036854775807-bigint.txt + etca-a + etc-a + etc2-a + a0001blah.txt + a010.txt + winlike-sort-difference-0.1.txt + winlike-sort-difference-0.1.1.txt + a1.txt + b1-a0001blah.txt + b1-a010.txt + b1-a1.txt + -a1.txt + --a1.txt + --a10.txt + 2.high-two.yml + 02.higher-two.yml + reconfig.txt + _common.stuff.txt + CASETEST.txt + casetest.txt + something.txt + some~thing.txt + someathing.txt + someThing.txt + thing.txt + thing_revised.txt + thing-revised.txt + "thing revised.txt" + "spacetest.txt" + " spacetest.txt" + " spacetest.txt" + "spacetest2.txt" + "spacetest 2.txt" + "spacetest02.txt" + name.txt + name2.txt + "name .txt" + "name2 .txt" + blah.txt + combined.txt + a001.txt + .test + .ssh + "Feb 10.txt" + "Feb 8.txt" + 1ab23v23v3r89ad8a8a8a9d.txt + "Folder (10)/file.tar.gz" + "Folder/file.tar.gz" + "Folder (1)/file (1).tar.gz" + "Folder (1)/file.tar.gz" + "Folder (01)/file.tar.gz" + "Folder1/file.tar.gz" + "Folder(1)/file.tar.gz" + + } + lappend testlist "Some file.txt" + lappend testlist " Some extra file1.txt" + lappend testlist " Some extra file01.txt" + lappend testlist " some extra file1.txt" + lappend testlist " Some extra file003.txt" + lappend testlist " Some file.txt" + lappend testlist "Some extra file02.txt" + lappend testlist "Program Files (x86)" + lappend testlist "01999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999991-bigger-pathologically-bignum.txt" + lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt" + lappend testlist "29999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-smaller-pathologically-bignum.txt" + lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 1.txt" + lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 01.txt" + lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt" + lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a01a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt" + lappend testlist "b1b1b1b1.txt" + lappend testlist "b1b01z1z1.txt" + lappend testlist "c1c111c1.txt" + lappend testlist "c1c1c1c1.txt" + + namespace eval overtype { + proc right {args} { + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? undertext overtext} + } + foreach {undertext overtext} [lrange $args end-1 end] break + + set opt(-overflow) 0 + array set opt [lrange $args 0 end-2] + + + set olen [string length $overtext] + set ulen [string length $undertext] + + if {$opt(-overflow)} { + return [string range $undertext 0 end-$olen]$overtext + } else { + if {$olen > $ulen} { + set diff [expr {$olen - $ulen}] + return [string range $undertext 0 end-$olen][string range $overtext 0 end-$diff] + } else { + return [string range $undertext 0 end-$olen]$overtext + } + } + } + proc left {args} { + # @c overtype starting at left (overstrike) + # @c can/should we use something like this?: 'format "%-*s" $len $overtext + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} + } + foreach {undertext overtext} [lrange $args end-1 end] break + + set opt(-ellipsis) 0 + set opt(-ellipsistext) {...} + set opt(-overflow) 0 + array set opt [lrange $args 0 end-2] + + + set len [string length $undertext] + set overlen [string length $overtext] + set diff [expr {$overlen - $len}] + + #puts stdout "====================>overtype: datalen:$len overlen:$overlen diff:$diff" + #puts stdout "====================>overtype: data: $overtext" + if {$diff > 0} { + if {$opt(-overflow)} { + return $overtext + } else { + if {$opt(-ellipsis)} { + return [overtype::left [string range $overtext 0 [expr {$len -1}]] $opt(-ellipsistext)] + } else { + return [string range $overtext 0 [expr {$len -1}]] + } + } + } else { + return "$overtext[string range $undertext $overlen end]" + } + } + + } + + #considered using hex to make large numbers more compact for viewing in debug output - but it's not that much shorter and probably obscures more than it helps. + proc hex2dec {largeHex} { + #todo - use punk::lib::hex2dec - (scan supports ll so can do larger hex values directly) + set res 0 + set largeHex [string map {_ {}} $largeHex] + if {[string length $largeHex] <=7} { + #scan can process up to FFFFFFF and does so quickly + return [scan $largeHex %x] + } + foreach hexDigit [split $largeHex {}] { + set new 0x$hexDigit + set res [expr {16*$res + $new}] + } + return $res + } + proc dec2hex {decimalNumber} { + format %4.4llX $decimalNumber + } + + #punk::lib::trimzero + proc trimzero {number} { + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + #todo - consider human numeric split + #e.g consider SI suffixes k|KMGTPEZY in that order + + #in this context, for natural sorting - numeric segments don't contain underscores or other punctuation such as . - + etc. + #review - what about unicode equivalents such as wide numerals \UFF10 to \UFF19? unicode normalization? + proc split_numeric_segments {name} { + set segments [list] + while {[string length $name]} { + if {[scan $name {%[0-9]%n} chunk len] == 2} { + lappend segments $chunk + set name [string range $name $len end] + } + if {[scan $name {%[^0-9]%n} chunk len] == 2} { + lappend segments $chunk + set name [string range $name $len end] + } + } + return $segments + } + + proc padleft {str count {ch " "}} { + set val [string repeat $ch $count] + append val $str + set diff [expr {max(0,$count - [string length $str])}] + set offset [expr {max(0,$count - $diff)}] + set val [string range $val $offset end] + } + + + # Sqlite may have limited collation sequences available in default builds. + # with custom builds - there may be others such as 'natsort' - see https://sqlite.org/forum/forumpost/e4dc6f3331 + # This is of limited use with the few builtin collations available in 2023 ie binary,nocase & rtrim + # but may provide a quicker,flexible sort option, especially if/when more collation sequences are added to sqlite + # There are also prebuilt packages such as sqlite3-icu which allows things like "SELECT icu_load_collation('en_AU', 'australian');" + proc sort_sqlite {stringlist args} { + package require sqlite3 + + + set args [check_flags -caller natsort_sqlite -defaults [list -db :memory: -collate nocase -winlike 0 -topchars "\uFFFF" -debug 0 -splitchars [list / . - _] -extras {all}] -values $args] + set db [string trim [dict get $args -db]] + set collate [string trim [dict get $args -collate]] + set debug [string trim [dict get $args -debug]] + set topchars [string trim [dict get $args -topchars]] + + set topdot [expr {"." in $topchars}] + set topunderscore [expr {"_" in $topchars}] + + + sqlite3 db_sort_basic $db + set orderedlist [list] + db_sort_basic eval [string map [list %collate% $collate] {create table sqlitesort(index0 text COLLATE %collate%, name text COLLATE %collate%)}] + foreach nm $stringlist { + set segments [split_numeric_segments $nm] + set index "" + set s 0 + foreach seg $segments { + if {($s == 0) && ![string length [string trim $seg]]} { + #don't index leading space + } elseif {($s == 0) && ($topunderscore) && [string match _* [string trim $seg]]} { + append index "[padleft "0" 5]-d -100 topunderscore " + append index [string trim $seg] + } elseif {($s == 0) && ($topdot) && [string match .* [string trim $seg]]} { + append index "[padleft "0" 5]-d -50 topdot " + append index [string trim $seg] + } else { + if {[string is digit [string trim $seg]]} { + set basenum [trimzero [string trim $seg]] + set lengthindex "[padleft [string length $basenum] 5]-d" + append index "$lengthindex " + #append index [padleft $basenum 40] + append index $basenum + } else { + append index [string trim $seg] + } + } + incr s + } + puts stdout ">>$index" + db_sort_basic eval {insert into sqlitesort values($index,$nm)} + } + db_sort_basic eval [string map [list %collate% $collate] {select name from sqlitesort order by index0 COLLATE %collate% ASC, name COLLATE %collate% ASC }] { + lappend orderedlist $name + } + db_sort_basic close + return $orderedlist + } + + proc get_leading_char_count {str char} { + #todo - something more elegant? regex? + set count 0 + foreach c [split $str "" ] { + if {$c eq $char} { + incr count + } else { + break + } + } + return $count + } + proc stacktrace {} { + set stack "Stack trace:\n" + for {set i 1} {$i < [info level]} {incr i} { + set lvl [info level -$i] + set pname [lindex $lvl 0] + append stack [string repeat " " $i]$pname + + if {![catch {info args $pname} pargs]} { + foreach value [lrange $lvl 1 end] arg $pargs { + + if {$value eq ""} { + if {$arg != 0} { + info default $pname $arg value + } + } + append stack " $arg='$value'" + } + } else { + append stack " !unknown vars for $pname" + } + + append stack \n + } + return $stack + } + + proc get_char_count {str char} { + #faster than lsearch on split for str of a few K + expr {[tcl::string::length $str]-[tcl::string::length [tcl::string::map "$char {}" $str]]} + } + + proc build_key {chunk splitchars topdict tagconfig debug} { + variable stacktrace_on + if {$stacktrace_on} { + puts stderr "+++>[stacktrace]" + } + + set index_map [list - "" _ ""] + #e.g - need to maintain the order + #a b.txt + #a book.txt + #ab.txt + #abacus.txt + + + set original_splitchars [dict get $tagconfig original_splitchars] + + # tag_dashes test moved from loop - review + set tag_dashes 0 + if {![string length [dict get $tagconfig last_part_text_tag]]} { + #winlike + set tag_dashes 1 + } + if {("-" ni $original_splitchars)} { + set tag_dashes 1 + } + if {$debug >= 3} { + puts stdout "START build_key chunk : $chunk" + puts stdout "START build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" + } + + + ## index_map will have no effect if we've already split on the char anyway(?) + #foreach m [dict keys $index_map] { + # if {$m in $original_splitchars} { + # dict unset index_map $m + # } + #} + + #if {![string length $chunk]} return + + set result "" + if {![llength $splitchars]} { + #no more structural splits - but we need to examine numeric/non-numeric segments at the lowest level. + # we are at a leaf in the recursive split hierarchy + + set s "" ;#we never actually split on "" (unless that was put in splitchars.. but it probably shouldn't be) + set parts [list $chunk] ;#important to treat as list or leading/trailing whitespace lost + + + } else { + set s [lindex $splitchars 0] + if {"spudbucket$s" in "[split $chunk {}]"} { + error "dead-branch spudbucket" + set partindex [build_key $chunk [lrange $splitchars 1 end] $topdict $tagconfig $debug] + if {[dict get $tagconfig showsplits]} { + set pfx "(1${s}=)" ;# = sorts before _ + set partindex ${pfx}$partindex + } + + return $partindex + } else { + set parts_below_index "" + + if {$s ni [split $chunk ""]} { + #$s can be an empty string + set parts [list $chunk] + } else { + set parts [split $chunk $s] ;#whitespace preserved - even if splitting on s that is not in string. + } + #assert - we have a splitchar $s that is in the chunk - so at least one part + if {(![string length $s] || [llength $parts] == 0)} { + error "buld_key assertion false empty split char and/or no parts" + } + + set pnum 1 ;# 1 based for clarity of reading index in debug output + set subpart_count [llength $parts] + + set sub_splits [lrange $splitchars 1 end] ;#pass same splitchars to each subpart + foreach p $parts { + set partindex [build_key $p $sub_splits $topdict $tagconfig $debug] + set lastpart [expr {$pnum == $subpart_count}] + + + ####################### + set showsplits [dict get $tagconfig showsplits] + #split prefixing experiment - maybe not suitable for general use - as it affects sort order + #note that pfx must be consistent until last one, no matter how many partnumbers there are in total. + # we don't want to influence sort order before reaching end. + #e.g for: + #(1.=)... + #(1._)...(2._)...(3.=) + #(1._)...(2.=) + #Note that this is probably more suitable for highly structure dependant sorts where the results are maybe less.. natural. + if {$showsplits} { + if {$lastpart} { + set pfx "(${pnum}${s}_" + #set pfx "(${pnum}${s}=)" ;# = sorts before _ + } else { + set pfx "(${pnum}${s}_" + } + append parts_below_index $pfx + } + ####################### + + if {$lastpart} { + if {[string length $p] && [string is digit $p]} { + set last_part_tag "<22${s}>" + } else { + set last_part_tag "<33${s}>" + } + + set last_part_text_tag [dict get $tagconfig last_part_text_tag] + #for -winlike 1 there is no tag configured. Windows explorer likes to put things in the order: + # module-0.1.1.tm + # module-0.1.1.2.tm + # module-0.1.tm + # arguably -winlike 0 is more natural/human + # module-0.1.tm + # module-0.1.1.tm + # module-0.1.1.2.tm + + if {[string length $last_part_text_tag]} { + #replace only the first text-tag (<30>) from the subpart_index + if {[string match "<30?>*" $partindex]} { + #give textual string index a specific tag for last part in split only. e.g <130> for lower than integers + set partindex "<130>[string range $partindex 5 end]" + } + #append parts_below_index $last_part_tag + } + #set partindex $last_part_tag$partindex + + + } + append parts_below_index $partindex + + + + if {$showsplits} { + if {$lastpart} { + set suffix "${pnum}${s}=)" ;# = sorts before _ + } else { + set suffix "${pnum}${s}_)" + } + append parts_below_index $suffix + } + + + incr pnum + } + append parts_below_index "" ;# don't add anything at the tail that may perturb sort order + + if {$debug >= 3} { + set pad [string repeat " " 20] + puts stdout "END build_key chunk : $chunk " + puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" + puts stdout "END build_key ret below_index: $parts_below_index" + } + return $parts_below_index + + + } + } + + + + #puts stdout ">>>chunk:'$chunk'<<< split-on:$s parts: '$parts' splitchars: $splitchars -topdict:$topdict" + + + + + + #if {$chunk eq ""} { + # puts "___________________________________________!!!____" + #} + #puts stdout "-->chunk:$chunk $s parts:$parts" + + #puts stdout "---chunk:'$chunk' part:'$part' parts:'$parts' s:'$s'" + + + + + set segments [split_numeric_segments $chunk] ;#! + set stringindex "" + set segnum 0 + foreach seg $segments { + #puts stdout "=================---->seg:$seg segments:$segments" + #-strict ? + if {[string length $seg] && [string is digit $seg]} { + set basenum [trimzero [string trim $seg]] + set lengthindex "[padleft [string length $basenum] 4]d" + #append stringindex "<20>$lengthindex $basenum $seg" + } else { + set c1 [string range $seg 0 0] + #puts stdout "==============> c1'$c1' topdict: $topdict stringindex:$stringindex" + + if {$c1 in [dict keys $topdict]} { + set tag [dict get $topdict $c1] + #append stringindex "${tag}$c1" + #set seg [string range $seg 1 end] + } + #textindex + set leader "<30>" + set idx $seg + set idx [string trim $idx] + set idx [string tolower $idx] + set idx [string map $index_map $idx] + + + + + + #set the X-c count to match the length of the index - not the raw data + set lengthindex "[padleft [string length $idx] 4]c" + + #append stringindex "${leader}$idx $lengthindex $texttail" + } + } + + if {[llength $parts] != 1} { + error "build_key assertion fail llength parts != 1 parts:$parts" + } + + set segtail_clearance_buffer " " ;#space to clear other split indicators if using showsplits + set segtail $segtail_clearance_buffer + append segtail "\[" + set grouping "" + set pnum 0 + foreach p $parts { + set sublen_list [list] + set subsegments [split_numeric_segments $p] + set i 0 + + set partsorter "" + foreach sub $subsegments { + ##don't trim spaces here - it would be inconsistent. Some subs are pure whitespace - others have internal whitespace. e.g "a4 400b a b2" becomes "a 4 { } 400 {b a b} 2" + #mapping away all whitespace would be consistent, but not necessarily desirable. If it's in the index_map it'll happen anyway - so we don't do it manually here except for evaluating conditions. + set test_trim [string trim $sub] + set str $sub + set str [string tolower $str] + set str [string map $index_map $str] + if {[string length $test_trim] && [string is digit $test_trim]} { + append partsorter [trimzero $str] + } else { + append partsorter "$str" + } + append partsorter + } + + + foreach sub $subsegments { + + if {[string length $sub] && [string is digit $sub]} { + set basenum [trimzero [string trim $sub]] + set subequivs $basenum + set lengthindex "[padleft [string length $subequivs] 4]d " + set idx "$lengthindex [padleft $basenum 10]" ;#todo - cycle through data and determine longest + set tail [overtype::left [string repeat " " 10] $sub] + #set tail "" + } else { + set idx "" + + + set lookahead [lindex $subsegments $i+1] + if {![string length $lookahead]} { + set zeronum "[padleft 0 4]d0" + } else { + set zeronum "" + } + set subequivs $sub + #set subequivs [string trim $subequivs] + set subequivs [string tolower $subequivs] + set subequivs [string map $index_map $subequivs] + + append idx $subequivs + append idx $zeronum + + set idx $subequivs + + + # + + set ch "-" + if {$tag_dashes} { + #puts stdout "____TAG DASHES" + #winlike + set numleading [get_leading_char_count $seg $ch] + if {$numleading > 0} { + set texttail "<31-leading[padleft $numleading 4]$ch>" + } else { + set texttail "<30>" + } + set numothers [expr {[get_char_count $seg $ch] - $numleading}] + if {$debug >= 2} { + puts stdout "____dashcount: [get_char_count $seg $ch] numothers: $numothers" + } + if {$numothers > 0} { + append texttail "<31-others[padleft $numothers 4]$ch>" + } else { + append textail "<30>" + } + } else { + set texttail "<30>" + } + + + + + #set idx $partsorter + set tail "" + #set tail [string tolower $sub] ;#raw + #set tail $partsorter + #append tail ":[string tolower $p]" ;#we need the whole part - even though it makes the index much larger. !todo - tagconfig switch to toggle case sensitive sorting + } + + append grouping "$idx $tail|$s" + incr i + } + + + + + + if {$p eq ""} { + # no subsegments.. + set zeronum "[padleft 0 4]d0" + #append grouping "\u000$zerotail" + append grouping ".$zeronum" + } + + #append grouping | + #append grouping $s + #foreach len $sublen_list { + # append segtail "<[padleft $len 3]>" + #} + incr pnum + } + set grouping [string trimright $grouping $s] + append grouping "[padleft [llength $parts] 4]" + append segtail $grouping + + + #append segtail " <[padleft [llength $parts] 4]>" + + append segtail "\]" + + + #if {[string length $seg] && [string is digit $seg]} { + # append segtail "<20>" + #} else { + # append segtail "<30>" + #} + append stringindex $segtail + + incr segnum + + + + + lappend indices $stringindex + + if {[llength $indices] > 1} { + puts stderr "INDICES [llength $indices]: $stringindex" + error "build_key assertion error deadconcept indices" + } + + #topchar handling on splitter characters + #set c1 [string range $chunk 0 0] + if {$s in [dict keys $topdict]} { + set tag [dict get $topdict $s] + set joiner [string map [list ">" "$s>"] ${tag}] + #we have split on this character $s so if the first part is empty string then $s was a leading character + # we need to bring a tag out front for this, or it will be dominated by the leading sections-remaing tag + # (since the empty string produces no tag of it's own - ?) + if {[string length [lindex $parts 0]] == 0} { + set prefix ${joiner} + } else { + set prefix "" + } + } else { + #use standard character-data positioning tag if no override from topdict + set joiner "<30J>$s" + set prefix "" + } + + + set contentindex $prefix[join $indices $joiner] + if {[string length $s]} { + set split_indicator "" + } else { + set split_indicator "" + + } + if {![string length $s]} { + set s ~ + } + + #return "[overtype::left [string repeat { } 80] $contentindex][overtype::left [string repeat { } 10] [list $s $chunk]]" + #return $contentindex$split_indicator + #return [overtype::left [string repeat - 40] $contentindex] + + if {$debug >= 3} { + puts stdout "END build_key chunk : $chunk" + puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" + puts stdout "END build_key ret contentidx : $contentindex" + } + return $contentindex + } + + #---------------------------------------- + #line-processors - data always last argument - opts can be empty string + #all processor should accept empty opts and ignore opts if they don't use them + proc _lineinput_as_tcl1 {opts line} { + set out "" + foreach i $line { + append out "$i " + } + set out [string range $out 0 end-1] + return $out + } + #should be equivalent to above + proc _lineinput_as_tcl {opts line} { + return [concat {*}$line] + } + #will put extra tcl quoting if it was already tcl-shaped e.g text "a" -> {"a"} + proc _lineoutput_as_tcl {opts line} { + return [regexp -inline -all {\S+} $line] + } + + proc _lineinput_as_raw {opts line} { + return $line + } + proc _lineoutput_as_raw {opts line} { + return $line + } + + #words is opposite of tcl + proc _lineinput_as_words {opts line} { + #wordlike_parts + return [regexp -inline -all {\S+} $line] + } + proc _lineoutput_as_words {opts line} { + return [concat {*}$line] + } + + #opts same as tcllib csv::split - except without the 'line' element + #?-alternate? ?sepChar? ?delChar? + proc _lineinput_as_csv {opts line} { + package require csv + if {[lindex $opts 0] eq "-alternate"} { + return [csv::split -alternate $line {*}[lrange $opts 1 end]] + } else { + return [csv::split $line {*}$opts] + } + } + #opts same as tcllib csv::join + #?sepChar? ?delChar? ?delMode? + proc _lineoutput_as_csv {opts line} { + package require csv + return [csv::join $line {*}$opts] + } + #---------------------------------------- + variable sort_flagspecs + set sort_flagspecs [dict create\ + -caller natsort::sort \ + -return supplied|defaults \ + -defaults [list -collate nocase \ + -winlike 0 \ + -splits "\uFFFF" \ + -topchars {. _} \ + -showsplits 1 \ + -sortmethod ascii \ + -collate "\uFFFF" \ + -inputformat raw \ + -inputformatapply {index data} \ + -inputformatoptions "" \ + -outputformat raw \ + -outputformatoptions "" \ + -cols "\uFFFF" \ + -debug 0 -db "" -stacktrace 0 -splits "\uFFFF" -showsplits 0] \ + -required {all} \ + -extras {none} \ + -commandprocessors {}\ + ] + + proc sort {stringlist args} { + #puts stdout "natsort::sort args: $args" + variable debug + variable sort_flagspecs + if {![llength $stringlist]} return + if {[llength $stringlist] == 1} { + if {"-inputformat" ni $args && "-outputformat" ni $args} { + return $stringlist + } + } + + #allow pass through of the check_flags flag -debugargs so it can be set by the caller + set debugargs 0 + if {[set posn [lsearch $args -debugargs]] >=0} { + if {$posn == [llength $args]-1} { + #-debugargs at tail of list + set debugargs 1 + } else { + set debugargs [lindex $args $posn+1] + } + } + + #-return flagged|defaults doesn't work Review. + #flagfilter global processor/allocator not working 2023-08 + + set opts [check_flags {*}$sort_flagspecs -debugargs $debugargs -values $args] + + #we can only shortcircuit input list of single element at this point if there aren't non-default -inputformat or -outputformat transformations + if {[llength $stringlist] == 1} { + set is_basic 1 + foreach fname [list -inputformat -outputformat] { + if {[dict get $sort_flagspecs -defaults $fname] ne [dict get $opts $fname]} { + set is_basic 0 + break + } + } + if {$is_basic} { + return $stringlist + } + } + + + set winlike [dict get $opts -winlike] + set topchars [dict get $opts -topchars] + set cols [dict get $opts -cols] + set debug [dict get $opts -debug] + set stacktrace [dict get $opts -stacktrace] + set showsplits [dict get $opts -showsplits] + set splits [dict get $opts -splits] + set sortmethod [dict get $opts -sortmethod] + set opt_collate [dict get $opts -collate] + set opt_inputformat [dict get $opts -inputformat] + set opt_inputformatapply [dict get $opts -inputformatapply] + set opt_inputformatoptions [dict get $opts -inputformatoptions] + set opt_outputformat [dict get $opts -outputformat] + set opt_outputformatoptions [dict get $opts -outputformatoptions] + + if {$debug} { + #dict unset opts -showsplits + #dict unset opts -splits + puts stdout "natsort::sort processed_args: $opts" + if {$debug == 1} { + puts stdout "natsort::sort - try also -debug 2, -debug 3" + } + } + + #set sortmethod "-dictionary" ;# sorts a2b before a001b - possibly other strangenesses that are hard to reason about + switch -- $sortmethod { + dictionary - ascii { + set sortmethod "-$sortmethod" + # -ascii is default for tcl lsort. + } + default { + set sortmethod "-ascii" + } + } + + set allowed_collations [list nocase] + if {$opt_collate ne "\uFFFF"} { + if {$opt_collate ni $allowed_collations} { + error "natsort::sort unknown value for -collate option. Only acceptable value(s): $allowed_collations" + } + set nocaseopt "-$opt_collate" + } else { + set nocaseopt "" + } + set allowed_inputformats [list tcl raw csv words] + switch -- $opt_inputformat { + tcl - raw - csv - words {} + default { + error "natsort::sort unknown value for -inputformat option. Only acceptable value(s): $allowed_inputformats" + } + } + set allowed_outputformats [list tcl raw csv words] + switch -- $opt_outputformat { + tcl - raw - csv - words {} + default { + error "natsort::sort unknown value for -outputformat option. Only acceptable value(s): $allowed_outputformats" + } + } + + # + set winsplits [list / . _] + set commonsplits [list / . _ -] + #set commonsplits [list] + + set tagconfig [dict create] + dict set tagconfig last_part_text_tag "<19>" + if {$winlike} { + set splitchars $winsplits + #windows explorer sorts leading spaces at the top - which doesn't seem very helpful for keeping things together - but the explorer doesn't seem able to create leading spaces anyway. + set wintop [list "(" ")" { } {.} {_}] ;#windows specific order + foreach t $topchars { + if {$t ni $wintop} { + lappend wintop $t + } + } + set topchars $wintop + dict set tagconfig last_part_text_tag "" + } else { + set splitchars $commonsplits + } + if {$splits ne "\uFFFF"} { + set splitchars $splits + } + dict set tagconfig original_splitchars $splitchars + dict set tagconfig showsplits $showsplits + + #create topdict + set i 0 + set topdict [dict create] + foreach c $topchars { + incr i ;#start at 01 so that 00 reserved for final-split tag (allows x-0.1.txt to sort above x-0.1.1.txt by default. Use tagconfig to change, or choose -winlike 1 for explorer-like sorting) + dict set topdict $c "<0$i>" + } + set keylist [list] + + switch -- $opt_inputformat { + tcl { + set lineinput_transform [list _lineinput_as_tcl $opt_inputformatoptions] + } + csv { + set lineinput_transform [list _lineinput_as_csv $opt_inputformatoptions] + } + raw { + set lineinput_transform [list _lineinput_as_raw $opt_inputformatoptions] + } + words { + set lineinput_transform [list _lineinput_as_words $opt_inputformatoptions] + } + } + switch -- $opt_outputformat { + tcl { + set lineoutput_transform [list _lineoutput_as_tcl $opt_outputformatoptions] + } + csv { + set lineoutput_transform [list _lineoutput_as_csv $opt_outputformatoptions] + } + raw { + set lineoutput_transform [list _lineoutput_as_raw $opt_outputformatoptions] + } + words { + set lineoutput_transform [list _lineoutput_as_words $opt_outputformatoptions] + } + } + + if {("data" in $opt_inputformatapply) || ("index" in $opt_inputformatapply)} { + if {$opt_inputformat eq "raw"} { + set tf_stringlist $stringlist + } else { + set tf_stringlist [list] + foreach v $stringlist { + lappend tf_stringlist [{*}$lineinput_transform $v] + } + } + if {"data" in $opt_inputformatapply} { + set tf_data_stringlist $tf_stringlist + } else { + set tf_data_stringlist $stringlist + } + if {"index" in $opt_inputformatapply} { + set tf_index_stringlist $tf_stringlist + } else { + set tf_index_stringlist $stringlist + } + } else { + set tf_data_stringlist $stringlist + set tf_index_stringlist $stringlist + } + + + + if {$stacktrace} { + puts stdout [natsort::stacktrace] + set natsort::stacktrace_on 1 + } + if {$cols eq "\uFFFF"} { + set colkeys [lmap v $stringlist {}] + } else { + set colkeys [list] + foreach v $tf_index_stringlist { + set lineparts $v + set k [list] + foreach c $cols { + lappend k [lindex $lineparts $c] + } + lappend colkeys [join $k "_"] ;#use a common-split char - Review + } + } + #puts stdout "colkeys: $colkeys" + + if {$opt_inputformat eq "raw"} { + #no inputformat was applied - can just use stringlist + foreach value $stringlist ck $colkeys { + set contentindex [build_key $value $splitchars $topdict $tagconfig $debug] + set colindex [build_key $ck $splitchars $topdict $tagconfig $debug] + lappend keylist ${colindex}-${contentindex}-$value ;#note: entire raw value used for final sort disambiguation (can be whitespace that was ignored in indexing) + } + } else { + foreach keyinput $tf_index_stringlist datavalue $tf_data_stringlist ck $colkeys { + #data may or may not have been transformed + #column index may or may not have been built with transformed data + + set contentindex [build_key $keyinput $splitchars $topdict $tagconfig $debug] + set colindex [build_key $ck $splitchars $topdict $tagconfig $debug] + lappend keylist ${colindex}-${contentindex}-$datavalue ;#note: entire value used for final sort disambiguation (can be whitespace that was ignored in indexing) + } + } + #puts stderr "keylist: $keylist" + + ################################################################################################### + # Use the generated keylist to do the actual sorting + # select either the transformed or raw data as the corresponding output + ################################################################################################### + if {[string length $nocaseopt]} { + set sortcommand [list lsort $sortmethod $nocaseopt -indices $keylist] + } else { + set sortcommand [list lsort $sortmethod -indices $keylist] + } + if {$opt_outputformat eq "raw"} { + #raw output means no further transformations - it doesn't mean there wasn't a transform applied on the input side + #use the tf_data_stringlist in the output - which will be the same as the input stringlist if no input transform applied for data. + #(Also - it may or may not have been *sorted* on transformed data depending on whether 'index' was in $opt_inputformatapply) + foreach idx [{*}$sortcommand] { + lappend result [lindex $tf_data_stringlist $idx] + } + } else { + #we need to apply an output format + #The data may or may not have been transformed at input + foreach idx [{*}$sortcommand] { + lappend result [{*}$lineoutput_transform [lindex $tf_data_stringlist $idx]] + } + } + ################################################################################################### + + + + + + if {$debug >= 2} { + set screen_width 250 + set max_val 0 + set max_idx 0 + ##### calculate colum widths + foreach i [{*}$sortcommand] { + set len_val [string length [lindex $stringlist $i]] + if {$len_val > $max_val} { + set max_val $len_val + } + set len_idx [string length [lindex $keylist $i]] + if {$len_idx > $max_idx} { + set max_idx $len_idx + } + } + #### + set l_width [expr {$max_val + 1}] + set leftcol [string repeat " " $l_width] + set r_width [expr {$screen_width - $l_width - 1}] + set rightcol [string repeat " " $r_width] + set str [overtype::left $leftcol RAW] + puts stdout " $str Index with possibly transformed data at tail" + foreach i [{*}$sortcommand] { + #puts stdout "|d> [overtype::left $leftcol [lindex $stringlist $i] ] [lindex $keylist $i]" + set index [lindex $keylist $i] + set len_idx [string length $index] + set rowcount [expr {$len_idx / $r_width}] + if {($len_idx % $r_width) > 0} { + incr rowcount + } + set rows [list] + for {set r 0} {$r < $rowcount} {incr r} { + lappend rows [string range $index 0 $r_width-$r] + set index [string range $index $r_width end] + } + + set r 0 + foreach idxpart $rows { + if {$r == 0} { + #use the untransformed stringlist + set str [overtype::left $leftcol [lindex $stringlist $i]] + } else { + set str [overtype::left $leftcol ...]] + } + puts stdout " $str $idxpart" + incr r + } + #puts stdout "|> '[lindex $stringlist $i]'" + #puts stdout "|> [lindex $keylist $i]" + } + + puts stdout "|debug> topdict: $topdict" + puts stdout "|debug> splitchars: $splitchars" + } + return $result + } + + + + #Note that although leading whitespace isn't a commonly used feature of filesystem names - it's possible at least on FreeBSD,windows and linux so we should try to handle it sensibly. + proc sort_experiment {stringlist args} { + package require sqlite3 + + variable debug + set args [check_flags -caller natsort::sort \ + -defaults [dict create -db :memory: -collate nocase -nullvalue "->NULL<" -winlike 0 -topchars [list] -debug 0] \ + -extras {all} \ + -values $args] + set db [string trim [dict get $args -db]] + set collate [string trim [dict get $args -collate]] + set winlike [string trim [dict get $args -winlike]] + set debug [string trim [dict get $args -debug]] + set nullvalue [string trim [dict get $args -nullvalue]] + + + set topchars [string trim [dict get $args -topchars]] + + set topdot [expr {"." in $topchars}] + set topunderscore [expr {"_" in $topchars}] + + + sqlite3 db_natsort2 $db + #-- + #our table must handle the name with the greatest number of numeric/non-numeric splits. + #This means a single list member with pathological naming e.g a1a1a1a1a1a1a1a1a1a1a1.txt could greatly extend the number of columns and indices and affect performance. + #review: could be optimised to aggregate the tail into a single index, as the the extra columns won't assist in ordering, but make the table and query bigger. + # we should probably determine the longest common sequence of splits in the input list and add only one more index for the segment after that. + set maxsegments 0 + #-- + set prefix "idx" + + #note - there will be more columns in the sorting table than segments. + # (a segment equals one of the numeric & non-numeric string portions returned from 'split_numeric_sgements') + #--------------------------- + # consider + # a123b.v1.2.txt + # a123b.v1.3beta1.txt + # these have the following segments: + # a 123 b.v 1 . 2 .txt + # a 123 b.v 1 . 3 beta 1 .txt + #--------------------------- + # The first string has 7 segments (numbered 0 to 6) + # the second string has 9 segments + # + # for example when the data has any elements in a segment position that are numeric (e.g 0001 123) + # - then an index column with numeric equivalents will be created (e.g 0001 becomes 1), and any non-numeric values in that column will get mapped to a negative value (for special cases) or a high value such as NULL (with NULLS LAST sql support) + # + # when a segment + + #cycle through all strings - we cannot build tabledef as we go because the column type depends on whether all segments for a particular column are text vs int-equivalent. + array set segmentinfo {} + foreach nm $stringlist { + set segments [split_numeric_segments $nm] + if {![string length [string trim [lindex $segments 0]]]} { + if {[string is digit [string trim [lindex $segments 1]]]} { + #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though) + set segments [lrange $segments 1 end] + } + } + + + set c 0 ;#start of index columns + if {[llength $segments] > $maxsegments} { + set maxsegments [llength $segments] + } + foreach seg $segments { + set seg [string trim $seg] + set column_exists [info exists segmentinfo($c,type)] + if {[string is digit $seg]} { + if {$column_exists} { + #override it (may currently be text or int) + set segmentinfo($c,type) "int" + } else { + #new column + set segmentinfo($c,name) ${prefix}$c + set segmentinfo($c,type) "int" + } + } else { + #text never overrides int + if {!$column_exists} { + set segmentinfo($c,name) ${prefix}$c + set segmentinfo($c,type) "text" + } + } + incr c + } + } + if {$debug} { + puts stdout "Largest number of num/non-num segments in data: $maxsegments" + #parray segmentinfo + } + + # + set tabledef "" + set ordered_column_names [list] + set ordered_segmentinfo_tags [lsort -dictionary [array names segmentinfo *]] + foreach k $ordered_segmentinfo_tags { + lassign [split $k ,] c tag + if {$tag eq "type"} { + set type [set segmentinfo($k)] + if {$type eq "int"} { + append tabledef "$segmentinfo($c,name) int," + } else { + append tabledef "$segmentinfo($c,name) text COLLATE $collate," + } + append tabledef "raw$c text COLLATE $collate," + lappend ordered_column_names $segmentinfo($c,name) + lappend ordered_column_names raw$c ;#additional index column not in segmentinfo + } + if {$tag eq "name"} { + #lappend ordered_column_names $segmentinfo($k) + } + } + append tabledef "name text" + + #puts stdout "tabledef:$tabledef" + + + db_natsort2 eval [string map [list %tabledef% $tabledef] {create table natsort(%tabledef%)}] + + + foreach nm $stringlist { + array unset intdata + array set intdata {} + array set rawdata {} + #init array and build sql values string + set sql_insert "insert into natsort values(" + for {set i 0} {$i < $maxsegments} {incr i} { + set intdata($i) "" + set rawdata($i) "" + append sql_insert "\$intdata($i),\$rawdata($i)," + } + append sql_insert "\$nm" ;#don't manipulate name value in any way - e.g must leave all whitespace as the sort must return exactly the same elements as in the original list. + append sql_insert ")" + + set segments [split_numeric_segments $nm] + if {![string length [string trim [lindex $segments 0]]]} { + if {[string is digit [string trim [lindex $segments 1]]]} { + #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though) + set segments [lrange $segments 1 end] + } + } + set values "" + set c 0 + foreach seg $segments { + if {[set segmentinfo($c,type)] eq "int"} { + if {[string is digit [string trim $seg]]} { + set intdata($c) [trimzero [string trim $seg]] + } else { + catch {unset intdata($c)} ;#set NULL - sorts last + if {($c == 0) && ($topunderscore) && [string match _* [string trim $seg]]} { + set intdata($c) -100 + } + if {($c == 0) && ($topdot) && [string match .* [string trim $seg]]} { + set intdata($c) -50 + } + } + set rawdata($c) [string trim $seg] + } else { + #pure text column + #set intdata($c) [string trim $seg] ;#ignore leading/trailing whitespace - we sort first on trimmed version, then refine with the sort on rawdata index + #catch {unset indata($c)} + set indata($c) [string trim $seg] + set rawdata($c) $seg + } + #set rawdata($c) [string trim $seg]# + #set rawdata($c) $seg + incr c + } + db_natsort2 eval $sql_insert + } + + set orderedlist [list] + + if {$debug} { + db_natsort2 eval {select * from pragma_table_info('natsort')} rowdata { + parray rowdata + } + } + set orderby "order by " + + foreach cname $ordered_column_names { + if {[string match "idx*" $cname]} { + append orderby "$cname ASC NULLS LAST," + } else { + append orderby "$cname ASC," + } + } + append orderby " name ASC" + #append orderby " NULLS LAST" ;#?? + + #e.g "order by idx0 ASC, raw0 ASC, idx1 ASC .... name ASC" + if {$debug} { + puts stdout "orderby clause: $orderby" + } + db_natsort2 eval [string map [list %orderby% $orderby] {select * from natsort %orderby%}] rowdata { + set line "- " + #parray rowdata + set columnnames $rowdata(*) + #puts stdout "columnnames: $columnnames" + #[lsort -dictionary [array names rowdata] + append line "$rowdata(name) \n" + foreach nm $columnnames { + if {$nm ne "name"} { + append line "$nm: $rowdata($nm) " + } + } + #puts stdout $line + #puts stdout "$rowdata(name)" + lappend orderedlist $rowdata(name) + } + + db_natsort2 close + return $orderedlist + } +} + + +#application section e.g this file might be linked from /usr/local/bin/natsort +namespace eval natsort { + namespace import ::flagfilter::check_flags + + proc called_directly_namematch {} { + global argv0 + #see https://wiki.tcl-lang.org/page/main+script + #trailing ... let's us resolve symlinks in last component of the path (could be something else like ___ but ... seems unlikely to collide with anything in the filesystem) + if {[info exists argv0] + && + [file dirname [file normalize [file join [info script] ...]]] + eq + [file dirname [file normalize [file join $argv0 ...]]] + } { + return 1 + } else { + #puts stdout "norm info script: [file dirname [file normalize [file join [info script] ...]]]" + #puts stdout "norm argv0 : [file dirname [file normalize [file join $argv0 ...]]]" + return 0 + } + } + #Review issues around comparing names vs using inodes (esp with respect to samba shares) + proc called_directly_inodematch {} { + global argv0 + if {[info exists argv0] + && [file exists [info script]] && [file exists $argv0]} { + file stat $argv0 argv0Info + file stat [info script] scriptInfo + expr {$argv0Info(dev) == $scriptInfo(dev) + && $argv0Info(ino) == $scriptInfo(ino)} + } else { + return 0 + } + } + + if {![interp issafe]} { + set is_namematch [called_directly_namematch] + set is_inodematch [called_directly_inodematch] + #### + #review - reliability of mechanisms to determine direct calls + # we don't want application being called when being used as a library, but we need it to run if called directly or from symlinks etc + #-- choose a policy and leave the others commented. + #set is_called_directly $is_namematch + #set is_called_directly $is_inodematch + set is_called_directly [expr {$is_namematch || $is_inodematch}] + #set is_called_directly [expr {$is_namematch && $is_inodematch}] + ### + + + #puts stdout "called_directly_name: [called_directly_namematch] called_directly_inode: [called_directly_inodematch]" + } else { + #safe interp + set is_called_directly 0 + } + + + + proc test_pass_fail_message {pass {additional ""}} { + variable test_fail_msg + variable test_pass_msg + if {$pass} { + puts stderr $test_pass_msg + } else { + puts stderr $test_fail_msg + } + puts stderr $additional + } + + variable test_fail_msg "XXXXXXXXXXXX FAIL XXXXXXXXXXXXX" + variable test_pass_msg "------------ PASS -------------" + proc test_sort_1 {args} { + package require struct::list + puts stderr "---$args" + set args [check_flags -caller natsort:test_sort_1 -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] -values $args] + + puts stderr "test_sort_1 got args: $args" + + set unsorted_input { + 2.2.2 + 2.2.2.2 + 1a.1.1 + 1a.2.1.1 + 1.12.1 + 1.2.1.1 + 1.02.1.1 + 1.002b.1.1 + 1.1.1.2 + 1.1.1.1 + } + set input { +1.1.1 +1.1.1.2 +1.002b.1.1 +1.02.1.1 +1.2.1.1 +1.12.1 +1a.1.1 +1a.2.1.1 +2.2.2 +2.2.2.2 + } + + set sorted [natsort::sort $input {*}$args] + set is_match [struct::list equal $input $sorted] + + set msg "windows-explorer order" + + test_pass_fail_message $is_match $msg + puts stdout [string repeat - 40] + puts stdout INPUT + puts stdout [string repeat - 40] + foreach item $input { + puts stdout $item + } + puts stdout [string repeat - 40] + puts stdout OUTPUT + puts stdout [string repeat - 40] + foreach item $sorted { + puts stdout $item + } + test_pass_fail_message $is_match $msg + return [expr {!$is_match}] + } + proc test_sort_showsplits {args} { + package require struct::list + + set args [check_flags -caller natsort:test_sort_1 \ + -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 1 ] \ + -extras {all} \ + -values $args] + + set input1 { + a-b.txt + a.b.c.txt + b.c-txt + } + + + set input2 { + a.b.c.txt + a-b.txt + b.c-text + } + + foreach {msg testlist } [list "custom-order" $input1 "windows-explorer (should work with -winlike 1)" $input2] { + set sorted [natsort::sort $testlist {*}$args] + set is_match [struct::list equal $testlist $sorted] + + test_pass_fail_message $is_match $msg + puts stderr "INPUT" + puts stderr "[string repeat - 40]" + foreach item $testlist { + puts stdout $item + } + puts stderr "[string repeat - 40]" + puts stderr "OUTPUT" + puts stderr "[string repeat - 40]" + foreach item $sorted { + puts stdout $item + } + + test_pass_fail_message $is_match $msg + } + + #return [expr {!$is_match}] + + } + + #tcl proc dispatch order - non flag items up front + #trailing flags are paired even if supplied as solo flags e.g -l becomes -l 1 + proc commandline_ls {args} { + set operands [list] + set posn 0 + foreach a $args { + if {![string match -* $a]} { + lappend operands $a + } else { + set flag1_posn $posn + break + } + incr posn + } + set args [lrange $args $flag1_posn end] + + + set debug 0 + set posn [lsearch $args -debug] + if {$posn > 0} { + if {[lindex $args $posn+1]} { + set debug [lindex $args $posn+1] + } + } + if {$debug} { + puts stderr "|debug>commandline_ls got $args" + } + + #if first operand not supplied - replace it with current working dir + if {[lindex $operands 0] eq "\uFFFF"} { + lset operands 0 [pwd] + } + + set targets [list] + foreach op $operands { + if {$op ne "\uFFFF"} { + set opchars [split [file tail $op] ""] + if {"?" in $opchars || "*" in $opchars} { + lappend targets $op + } else { + #actual file or dir + set targetitem $op + set targetitem [file normalize $op] + if {![file exists $targetitem]} { + if {$debug} { + puts stderr "|debug>commandline_ls Unable to access path '$targetitem'" + } + } + lappend targets $targetitem + if {$debug} { + puts stderr "|debug>commandline_ls listing for $targetitem" + } + } + } + } + set args [check_flags -caller commandline_ls \ + -return flagged|defaults \ + -debugargs 0 \ + -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 -algorithm sort] \ + -required {all} \ + -extras {all} \ + -soloflags {-v -l} \ + -commandprocessors {} \ + -values $args ] + if {$debug} { + puts stderr "|debug>args: $args" + } + + + set algorithm [dict get $args -algorithm] + dict unset args -algorithm + + set allfolders [list] + set allfiles [list] + foreach item $targets { + if {[file exists $item]} { + if {[file type $item] eq "directory"} { + set dotfolders [glob -nocomplain -directory $item -type {d} -tail .*] + set folders [glob -nocomplain -directory $item -type {d} -tail *] + set allfolders [concat $allfolders $dotfolders $folders] + + set dotfiles [glob -nocomplain -directory $item -type {f} -tail .*] + set files [glob -nocomplain -directory $item -type {f} -tail *] + set allfiles [concat $allfiles $dotfiles $files] + } else { + #file (or link?) + set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]] + set allfiles [concat $allfiles $files] + } + } else { + set folders [glob -nocomplain -directory $item -type {d} -tail [file tail $item]] + set allfolders [concat $allfolders $folders] + set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]] + set allfiles [concat $allfiles $files] + } + } + + + set sorted_folders [natsort::sort $allfolders {*}$args] + set sorted_files [natsort::sort $allfiles {*}$args] + + foreach fold $sorted_folders { + puts stdout $fold + } + foreach file $sorted_files { + puts stdout $file + } + + return "-- ok printed to stdout [llength $sorted_folders] folders and [llength $sorted_files] files --" + } + + package require argp + argp::registerArgs commandline_test { + { -showsplits boolean 0} + { -stacktrace boolean 0} + { -debug boolean 0} + { -winlike boolean 0} + { -db string ":memory:"} + { -collate string "nocase"} + { -algorithm string "sort"} + { -topchars string "\uFFFF"} + { -testlist string {10 1 30 3}} + } + argp::setArgsNeeded commandline_test {-stacktrace} + proc commandline_test {test args} { + variable testlist + puts stdout "commandline_test got $args" + argp::parseArgs opts + puts stdout "commandline_test got [array get opts]" + set args [check_flags -caller natsort_commandline \ + -return flagged|defaults \ + -defaults [list -db :memory: -collate nocase -testlist $testlist -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ + -values $args] + + if {[string tolower $test] in [list "1" "true"]} { + set test "sort" + } else { + if {![llength [info commands $test]]} { + error "test $test not found" + } + } + dict unset args -test + set stacktrace [dict get $args -stacktrace] + # dict unset args -stacktrace + + set argtestlist [dict get $args -testlist] + dict unset args -testlist + + + set debug [dict get $args -debug] + + set collate [dict get $args -collate] + set db [dict get $args -db] + set winlike [dict get $args -winlike] + set topchars [dict get $args -topchars] + + + puts stderr "|test>-----start natsort::$test--- input list size : [llength $argtestlist]" + #set resultlist [$test $argtestlist -db $db -collate $collate -topchars $topchars -winlike $winlike] + set resultlist [$test $argtestlist {*}$args] + foreach nm $resultlist { + puts stdout $nm + } + puts stdout "|test>-----end natsort::$test--- sorted list size: [llength $resultlist]" + return "test end" + } + proc commandline_runtests {runtests args} { + set argvals [check_flags -caller commandline_runtests \ + -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits "\uFFFF" -runtests 1] \ + -values $args] + + puts stderr "runtests args: $argvals" + + #set runtests [dict get $argvals -runtests] + dict unset argvals -runtests + dict unset argvals -algorithm + + puts stderr "runtests args: $argvals" + #exit 0 + + set test_prefix "::natsort::test_sort_" + + if {$runtests eq "1"} { + set runtests "*" + } + + + set testcommands [info commands ${test_prefix}${runtests}] + if {![llength $testcommands]} { + puts stderr "No test commands matched -runtests argument '$runtests'" + puts stderr "Use 1 to run all tests" + set alltests [info commands ${test_prefix}*] + puts stderr "Valid tests are:" + + set prefixlen [string length $test_prefix] + foreach t $alltests { + set shortname [string range $t $prefixlen end] + puts stderr "$t = -runtests $shortname" + } + + } else { + foreach cmd $testcommands { + puts stderr [string repeat - 40] + puts stderr "calling $cmd with args: '$argvals'" + puts stderr [string repeat - 40] + $cmd {*}$argvals + } + } + exit 0 + } + proc help {args} { + puts stdout "natsort::help got '$args'" + return "Help not implemented" + } + proc natsort_pipe {args} { + #PIPELINE to take input list on stdin and write sorted list to stdout + #strip - from arglist + #set args [check_flags -caller natsort_pipeline \ + # -return all \ + # -defaults [list -db :memory: -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ + # -values $args] + + + set debug [dict get $args -debug] + if {$debug} { + puts stderr "|debug> natsort_pipe got args:'$args'" + } + set algorithm [dict get $args -algorithm] + dict unset args -algorithm + + set proclist [info commands ::natsort::sort*] + set algos [list] + foreach p $proclist { + lappend algos [namespace tail $p] + } + if {$algorithm ni [list {*}$proclist {*}$algos]} { + do_error "valid sort mechanisms: $algos" 2 + } + + + set input_list [list] + while {![eof stdin]} { + if {[gets stdin line] > 0} { + lappend input_list $line + } else { + if {[eof stdin]} { + + } else { + after 10 + } + } + } + + if {$debug} { + puts stderr "|debug> received [llength $input_list] list elements" + } + + set resultlist [$algorithm $input_list {*}$args] + if {$debug} { + puts stderr "|debug> returning [llength $resultlist] list elements" + } + foreach r $resultlist { + puts stdout $r + } + #exit 0 + + } + if {($is_called_directly)} { + set cmdprocessors { + {helpfinal {match "^help$" dispatch natsort::help}} + {helpfinal {sub -topic default "NONE"}} + } + #set args [check_flags \ + # -caller test1 \ + # -debugargs 2 \ + # -return arglist \ + # -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ + # -required {none} \ + # -extras {all} \ + # -commandprocessors $cmdprocessors \ + # -values $::argv ] + interp alias {} do_filter {} ::flagfilter::check_flags + + #mashopts are generally single-letter opts that can be run together e.g -l -d as -ld + set cmdprocessors { + {helpcmd {match "^help$" dispatch natsort::help singleopts {-v}}} + {helpcmd {sub -operand default \uFFFF singleopts {-l}}} + {lscmd {match "^ls$" dispatch natsort::commandline_ls dispatchtype tcl dispatchglobal 1 mashopts {-l -a} singleopts {-l -a} pairopts {} longopts {--color=always}}} + {lscmd {sub dir default "\uFFFF"}} + {lscmd {sub dir2 default "\uFFFF"}} + {lscmd {sub dir3 default "\uFFFF"}} + {lscmd {sub dir4 default "\uFFFF"}} + {lscmd {sub dir5 default "\uFFFF"}} + {lscmd {sub dir6 default "\uFFFF"}} + {runtests {match "^-tests$" dispatch natsort::commandline_runtests singleopts {-l}}} + {runtests {sub testname default "1" singleopts {-l}}} + {pipecmd {match "^-$" dispatch natsort::natsort_pipe dispatchtype tcl}} + } + set arglist [do_filter \ + -debugargs 0 \ + -debugargsonerror 2 \ + -caller cline_dispatch1 \ + -return all \ + -soloflags {-v -x} \ + -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \ + -required {all} \ + -extras {all} \ + -commandprocessors $cmdprocessors \ + -values $::argv ] + + + #mashopts are generally single-letter opts that can be run together e.g -l -d as -ld + set cmdprocessors { + {testcmd {match "^test$" dispatch natsort::commandline_test singleopts {-l}}} + {testcmd {sub testname default "1" singleopts {-l}}} + } + set arglist [check_flags \ + -debugargs 0 \ + -caller cline_dispatch2 \ + -return all \ + -soloflags {-v -l} \ + -defaults [list -collate nocase -algorithm sort -testlist "1 2 3 10" -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \ + -required {all} \ + -extras {all} \ + -commandprocessors $cmdprocessors \ + -values $::argv ] + + + + + #set cmdprocessors [list] + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors $cmdprocessors -values $::argv ] + + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {-cmd {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ] + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {{-cmd -default help} {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ] + #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {ls {ls lsdir -default "\uFFFF"}} -values $::argv ] + + exit 0 + + if {$::argc} { + + } + } +} + + +package provide natsort [namespace eval natsort { + variable version + set version 0.1.1.6 +}] + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.1.tm new file mode 100644 index 00000000..ecf2cca9 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.1.tm @@ -0,0 +1,200 @@ +#JMN - api should be kept in sync with package patternlib where possible +# +package provide oolib [namespace eval oolib { + variable version + set version 0.1.1 +}] + +namespace eval oolib { + oo::class create collection { + variable o_data ;#dict + variable o_alias + constructor {} { + set o_data [dict create] + } + method info {} { + return [dict info $o_data] + } + method count {} { + return [dict size $o_data] + } + method isEmpty {} { + expr {[dict size $o_data] == 0} + } + method names {{globOrIdx {}}} { + if {[llength $globOrIdx]} { + if {[string is integer -strict $globOrIdx]} { + set idx $globOrIdx + if {$idx < 0} { + set idx "end-[expr {abs($idx + 1)}]" + } + if {[catch {lindex [dict keys $o_data] $idx} result]} { + error "[self object] no such index : '$idx'" + } else { + return $result + } + } else { + #glob + return [lsearch -glob -all -inline [dict keys $o_data] $globOrIdx] + } + } else { + return [dict keys $o_data] + } + } + #like names but without globbing + method keys {} { + dict keys $o_data + } + method key {{posn 0}} { + if {$posn < 0} { + set posn "end-[expr {abs($posn + 1)}]" + } + if {[catch {lindex [dict keys $o_data] $posn} result]} { + error "[self object] no such index : '$posn'" + } else { + return $result + } + } + method hasKey {key} { + dict exists $o_data $key + } + method get {} { + return $o_data + } + method items {} { + return [dict values $o_data] + } + method item {key} { + if {[string is integer -strict $key]} { + if {$key >= 0} { + set valposn [expr {(2*$key) +1}] + return [lindex $o_data $valposn] + } else { + set key "end-[expr {abs($key + 1)}]" + return [lindex $o_data $key] + #return [lindex [dict keys $o_data] $key] + } + } + if {[dict exists $o_data $key]} { + return [dict get $o_data $key] + } + } + #inverse lookup + method itemKeys {value} { + set value_indices [lsearch -all [dict values $o_data] $value] + set keylist [list] + foreach i $value_indices { + set idx [expr {(($i + 1) *2) -2}] + lappend keylist [lindex $o_data $idx] + } + return $keylist + } + method search {value args} { + set matches [lsearch {*}$args [dict values $o_data] $value] + if {"-inline" in $args} { + return $matches + } else { + set keylist [list] + foreach i $matches { + set idx [expr {(($i + 1) *2) -2}] + lappend keylist [lindex $o_data $idx] + } + return $keylist + } + } + #review - see patternlib. Is the intention for aliases to be configurable independent of whether the target exists? + method alias {newAlias existingKeyOrAlias} { + if {[string is integer -strict $newAlias]} { + error "[self object] collection key alias cannot be integer" + } + if {[string length $existingKeyOrAlias]} { + set o_alias($newAlias) $existingKeyOrAlias + } else { + unset o_alias($newAlias) + } + } + method aliases {{key ""}} { + if {[string length $key]} { + set result [list] + foreach {n v} [array get o_alias] { + if {$v eq $key} { + lappend result $n $v + } + } + return $result + } else { + return [array get o_alias] + } + } + #if the supplied index is an alias, return the underlying key; else return the index supplied. + method realKey {idx} { + if {[catch {set o_alias($idx)} key]} { + return $idx + } else { + return $key + } + } + method add {value key} { + if {[string is integer -strict $key]} { + error "[self object] collection key must not be an integer. Use another structure if integer keys required" + } + if {[dict exists $o_data $key]} { + error "[self object] col_processors object error: key '$key' already exists in collection" + } + dict set o_data $key $value + return [expr {[dict size $o_data] - 1}] ;#return index of item + } + method remove {idx {endRange ""}} { + if {[string length $endRange]} { + error "[self object] collection error: ranged removal not yet implemented.. remove one item at a time" + } + if {[string is integer -strict $idx]} { + if {$idx < 0} { + set idx "end-[expr {abs($idx+1)}]" + } + set key [lindex [dict keys $o_data] $idx] + set posn $idx + } else { + set key $idx + set posn [lsearch -exact [dict keys $o_data] $key] + if {$posn < 0} { + error "[self object] no such index: '$idx' in this collection" + } + } + dict unset o_data $key + return + } + method clear {} { + set o_data [dict create] + return + } + method reverse_the_collection {} { + #named slightly obtusely because reversing the data when there may be references held is a potential source of bugs + #the name reverse_the_collection should make it clear that the object is being modified in place as opposed to simply 'reverse' which may imply a view/copy. + #todo - consider implementing a get_reverse which provides an interface to the same collection without affecting original references, yet both allowing delete/edit operations. + set dictnew [dict create] + foreach k [lreverse [dict keys $o_data]] { + dict set dictnew $k [dict get $o_data $k] + } + set o_data $dictnew + return + } + #review - cmd as list vs cmd as script? + method map {cmd} { + set seed [list] + dict for {k v} $o_data { + lappend seed [uplevel #0 [list {*}$cmd $v]] + } + return $seed + } + method objectmap {cmd} { + set seed [list] + dict for {k v} $o_data { + lappend seed [uplevel #0 [list $v {*}$cmd]] + } + return $seed + } + } + +} + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.2.tm new file mode 100644 index 00000000..858c61cd --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.2.tm @@ -0,0 +1,201 @@ +#JMN - api should be kept in sync with package patternlib where possible +# +package provide oolib [namespace eval oolib { + variable version + set version 0.1.2 +}] + +namespace eval oolib { + oo::class create collection { + variable o_data ;#dict + #variable o_alias + constructor {} { + set o_data [dict create] + } + method info {} { + return [dict info $o_data] + } + method count {} { + return [dict size $o_data] + } + method isEmpty {} { + expr {[dict size $o_data] == 0} + } + method names {{globOrIdx {}}} { + if {[llength $globOrIdx]} { + if {[string is integer -strict $globOrIdx]} { + set idx $globOrIdx + if {$idx < 0} { + set idx "end-[expr {abs($idx + 1)}]" + } + if {[catch {lindex [dict keys $o_data] $idx} result]} { + error "[self object] no such index : '$idx'" + } else { + return $result + } + } else { + #glob + return [lsearch -glob -all -inline [dict keys $o_data] $globOrIdx] + } + } else { + return [dict keys $o_data] + } + } + #like names but without globbing + method keys {} { + dict keys $o_data + } + method key {{posn 0}} { + if {$posn < 0} { + set posn "end-[expr {abs($posn + 1)}]" + } + if {[catch {lindex [dict keys $o_data] $posn} result]} { + error "[self object] no such index : '$posn'" + } else { + return $result + } + } + method hasKey {key} { + dict exists $o_data $key + } + method get {} { + return $o_data + } + method items {} { + return [dict values $o_data] + } + method item {key} { + if {[string is integer -strict $key]} { + if {$key >= 0} { + set valposn [expr {(2*$key) +1}] + return [lindex $o_data $valposn] + } else { + set key "end-[expr {abs($key + 1)}]" + return [lindex $o_data $key] + #return [lindex [dict keys $o_data] $key] + } + } + if {[dict exists $o_data $key]} { + return [dict get $o_data $key] + } + } + #inverse lookup + method itemKeys {value} { + set value_indices [lsearch -all [dict values $o_data] $value] + set keylist [list] + foreach i $value_indices { + set idx [expr {(($i + 1) *2) -2}] + lappend keylist [lindex $o_data $idx] + } + return $keylist + } + method search {value args} { + set matches [lsearch {*}$args [dict values $o_data] $value] + if {"-inline" in $args} { + return $matches + } else { + set keylist [list] + foreach i $matches { + set idx [expr {(($i + 1) *2) -2}] + lappend keylist [lindex $o_data $idx] + } + return $keylist + } + } + #review - see patternlib. Is the intention for aliases to be configurable independent of whether the target exists? + #review - what is the point of alias anyway? - why slow down other operations when a variable can hold a keyname perfectly well? + #method alias {newAlias existingKeyOrAlias} { + # if {[string is integer -strict $newAlias]} { + # error "[self object] collection key alias cannot be integer" + # } + # if {[string length $existingKeyOrAlias]} { + # set o_alias($newAlias) $existingKeyOrAlias + # } else { + # unset o_alias($newAlias) + # } + #} + #method aliases {{key ""}} { + # if {[string length $key]} { + # set result [list] + # foreach {n v} [array get o_alias] { + # if {$v eq $key} { + # lappend result $n $v + # } + # } + # return $result + # } else { + # return [array get o_alias] + # } + #} + ##if the supplied index is an alias, return the underlying key; else return the index supplied. + #method realKey {idx} { + # if {[catch {set o_alias($idx)} key]} { + # return $idx + # } else { + # return $key + # } + #} + method add {value key} { + if {[string is integer -strict $key]} { + error "[self object] collection key must not be an integer. Use another structure if integer keys required" + } + if {[dict exists $o_data $key]} { + error "[self object] col_processors object error: key '$key' already exists in collection" + } + dict set o_data $key $value + return [expr {[dict size $o_data] - 1}] ;#return index of item + } + method remove {idx {endRange ""}} { + if {[string length $endRange]} { + error "[self object] collection error: ranged removal not yet implemented.. remove one item at a time" + } + if {[string is integer -strict $idx]} { + if {$idx < 0} { + set idx "end-[expr {abs($idx+1)}]" + } + set key [lindex [dict keys $o_data] $idx] + set posn $idx + } else { + set key $idx + set posn [lsearch -exact [dict keys $o_data] $key] + if {$posn < 0} { + error "[self object] no such index: '$idx' in this collection" + } + } + dict unset o_data $key + return + } + method clear {} { + set o_data [dict create] + return + } + method reverse_the_collection {} { + #named slightly obtusely because reversing the data when there may be references held is a potential source of bugs + #the name reverse_the_collection should make it clear that the object is being modified in place as opposed to simply 'reverse' which may imply a view/copy. + #todo - consider implementing a get_reverse which provides an interface to the same collection without affecting original references, yet both allowing delete/edit operations. + set dictnew [dict create] + foreach k [lreverse [dict keys $o_data]] { + dict set dictnew $k [dict get $o_data $k] + } + set o_data $dictnew + return + } + #review - cmd as list vs cmd as script? + method map {cmd} { + set seed [list] + dict for {k v} $o_data { + lappend seed [uplevel #0 [list {*}$cmd $v]] + } + return $seed + } + method objectmap {cmd} { + set seed [list] + dict for {k v} $o_data { + lappend seed [uplevel #0 [list $v {*}$cmd]] + } + return $seed + } + } + +} + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.tm new file mode 100644 index 00000000..3756fceb --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/oolib-0.1.tm @@ -0,0 +1,195 @@ +#JMN - api should be kept in sync with package patternlib where possible +# +package provide oolib [namespace eval oolib { + variable version + set version 0.1 +}] + +namespace eval oolib { + oo::class create collection { + variable o_data ;#dict + variable o_alias + constructor {} { + set o_data [dict create] + } + method info {} { + return [dict info $o_data] + } + method count {} { + return [dict size $o_data] + } + method isEmpty {} { + expr {[dict size $o_data] == 0} + } + method names {{globOrIdx {}}} { + if {[llength $globOrIdx]} { + if {[string is integer -strict $globOrIdx]} { + if {$idx < 0} { + set idx "end-[expr {abs($idx + 1)}]" + } + if {[catch {lindex [dict keys $o_data] $idx} result]} { + error "[self object] no such index : '$idx'" + } else { + return $result + } + } else { + #glob + return [lsearch -glob -all -inline [dict keys $o_data] $globOrIdx] + } + } else { + return [dict keys $o_data] + } + } + #like names but without globbing + method keys {} { + dict keys $o_data + } + method key {{posn 0}} { + if {$posn < 0} { + set posn "end-[expr {abs($posn + 1)}]" + } + if {[catch {lindex [dict keys $o_data] $posn} result]} { + error "[self object] no such index : '$posn'" + } else { + return $result + } + } + method hasKey {key} { + dict exists $o_data $key + } + method get {} { + return $o_data + } + method items {} { + return [dict values $o_data] + } + method item {key} { + if {[string is integer -strict $key]} { + if {$key > 0} { + set valposn [expr {(2*$key) +1}] + return [lindex $o_data $valposn] + } else { + set key "end-[expr {abs($key + 1)}]" + return [lindex [dict keys $o_data] $key] + } + } + if {[dict exists $o_data $key]} { + return [dict get $o_data $key] + } + } + #inverse lookup + method itemKeys {value} { + set value_indices [lsearch -all [dict values $o_data] $value] + set keylist [list] + foreach i $value_indices { + set idx [expr {(($i + 1) *2) -2}] + lappend keylist [lindex $o_data $idx] + } + return $keylist + } + method search {value args} { + set matches [lsearch {*}$args [dict values $o_data] $value] + if {"-inline" in $args} { + return $matches + } else { + set keylist [list] + foreach i $matches { + set idx [expr {(($i + 1) *2) -2}] + lappend keylist [lindex $o_data $idx] + } + return $keylist + } + } + #review - see patternlib. Is the intention for aliases to be configurable independent of whether the target exists? + method alias {newAlias existingKeyOrAlias} { + if {[string is integer -strict $newAlias]} { + error "[self object] collection key alias cannot be integer" + } + if {[string length $existingKeyOrAlias]} { + set o_alias($newAlias) $existingKeyOrAlias + } else { + unset o_alias($newAlias) + } + } + method aliases {{key ""}} { + if {[string length $key]} { + set result [list] + foreach {n v} [array get o_alias] { + if {$v eq $key} { + lappend result $n $v + } + } + return $result + } else { + return [array get o_alias] + } + } + #if the supplied index is an alias, return the underlying key; else return the index supplied. + method realKey {idx} { + if {[catch {set o_alias($idx)} key]} { + return $idx + } else { + return $key + } + } + method add {value key} { + if {[string is integer -strict $key]} { + error "[self object] collection key must not be an integer. Use another structure if integer keys required" + } + if {[dict exists $o_data $key]} { + error "[self object] col_processors object error: key '$key' already exists in collection" + } + dict set o_data $key $value + return [expr {[dict size $o_data] - 1}] ;#return index of item + } + method remove {idx {endRange ""}} { + if {[string length $endRange]} { + error "[self object] collection error: ranged removal not yet implemented.. remove one item at a time" + } + if {[string is integer -strict $idx]} { + if {$idx < 0} { + set idx "end-[expr {abs($idx+1)}]" + } + set key [lindex [dict keys $o_data] $idx] + set posn $idx + } else { + set key $idx + set posn [lsearch -exact [dict keys $o_data] $key] + if {$posn < 0} { + error "[self object] no such index: '$idx' in this collection" + } + } + dict unset o_data $key + return + } + method clear {} { + set o_data [dict create] + return + } + method reverse {} { + set dictnew [dict create] + foreach k [lreverse [dict keys $o_data]] { + dict set dictnew $k [dict get $o_data $k] + } + set o_data $dictnew + return + } + #review - cmd as list vs cmd as script? + method map {cmd} { + set seed [list] + dict for {k v} $o_data { + lappend seed [uplevel #0 [list {*}$cmd $v]] + } + return $seed + } + method objectmap {cmd} { + set seed [list] + dict for {k v} $o_data { + lappend seed [uplevel #0 [list $v {*}$cmd]] + } + return $seed + } + } + +} + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.4.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.4.tm new file mode 100644 index 00000000..42876322 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.4.tm @@ -0,0 +1,3685 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) Julian Noble 2003-2023 +# +# @@ Meta Begin +# Application overtype 1.6.4 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin overtype_module_overtype 0 1.6.4] +#[copyright "2024"] +#[titledesc {overtype text layout - ansi aware}] [comment {-- Name section and table of contents description --}] +#[moddesc {overtype text layout}] [comment {-- Description at end of page heading --}] +#[require overtype] +#[keywords module text ansi] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of overtype +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by overtype +#[list_begin itemized] + +package require Tcl 8.6- +package require textutil +package require punk::lib ;#required for lines_as_list +package require punk::ansi ;#required to detect, split, strip and calculate lengths +package require punk::char ;#box drawing - and also unicode character width determination for proper layout of text with double-column-width chars +package require punk::assertion +#*** !doctools +#[item] [package {Tcl 8.6}] +#[item] [package textutil] +#[item] [package punk::ansi] +#[para] - required to detect, split, strip and calculate lengths of text possibly containing ansi codes +#[item] [package punk::char] +#[para] - box drawing - and also unicode character width determination for proper layout of text with double-column-width chars + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +#PERFORMANCE notes +#overtype is very performance sensitive - used in ansi output all over the place ie needs to be optimised +#NOTE use of tcl::dict::for tcl::string::range etc instead of ensemble versions. This is for the many tcl 8.6/8.7 interps which don't compile ensemble commands when in safe interps +#similar for tcl::namespace::eval - but this is at least on some versions of Tcl - faster even in a normal interp. Review to see if that holds for Tcl 9. +#for string map: when there are exactly 2 elements - it is faster to use a literal which has a special case optimisation in the c code +#ie use tcl::string::map {\n ""} ... instead of tcl::string::map [list \n ""] ... +#note that we can use unicode (e.g \uFF31) and other escapes such as \t within these curly braces - we don't have to use double quotes +#generally using 'list' is preferred for the map as less error prone. +#can also use: tcl::string::map "token $var" .. but be careful regarding quoting and whitespace in var. This should be used sparingly if at all. + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section API] + + +#Julian Noble - 2003 +#Released under standard 'BSD license' conditions. +# +#todo - ellipsis truncation indicator for center,right + +#v1.4 2023-07 - naive ansi color handling - todo - fix tcl::string::range +# - need to extract and replace ansi codes? + +tcl::namespace::eval overtype { + namespace import ::punk::assertion::assert + punk::assertion::active true + + namespace path ::punk::lib + + namespace export * + variable default_ellipsis_horizontal "..." ;#fallback + variable default_ellipsis_vertical "..." + tcl::namespace::eval priv { + proc _init {} { + upvar ::overtype::default_ellipsis_horizontal e_h + upvar ::overtype::default_ellipsis_vertical e_v + set e_h [format %c 0x2026] ;#Unicode Horizontal Ellipsis + set e_v [format %c 0x22EE] + #The unicode ellipsis looks more natural than triple-dash which is centred vertically whereas ellipsis is at floorline of text + #Also - unicode ellipsis has semantic meaning that other processors can interpret + #unicode does also provide a midline horizontal ellipsis 0x22EF + + #set e [format %c 0x2504] ;#punk::char::charshort boxd_ltdshhz - Box Drawings Light Triple Dash Horizontal + #if {![catch {package require punk::char}]} { + # set e [punk::char::charshort boxd_ltdshhz] + #} + } + } + priv::_init +} +proc overtype::about {} { + return "Simple text formatting. Author JMN. BSD-License" +} + +tcl::namespace::eval overtype { + variable grapheme_widths [tcl::dict::create] + + variable escape_terminals + #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). + tcl::dict::set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] + #tcl::dict::set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic + tcl::dict::set escape_terminals OSC [list \007 \033\\] ;#note mix of 1 and 2-byte terminals + + #self-contained 2 byte ansi escape sequences - review more? + variable ansi_2byte_codes_dict + set ansi_2byte_codes_dict [tcl::dict::create\ + "reset_terminal" "\u001bc"\ + "save_cursor_posn" "\u001b7"\ + "restore_cursor_posn" "\u001b8"\ + "cursor_up_one" "\u001bM"\ + "NEL - Next Line" "\u001bE"\ + "IND - Down one line" "\u001bD"\ + "HTS - Set Tab Stop" "\u001bH"\ + ] + + #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. + # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ +} + + +#proc overtype::stripansi {text} { +# variable escape_terminals ;#dict +# variable ansi_2byte_codes_dict +# #important that we don't spend too much time on this for plain text that doesn't contain any escapes anyway +# if {[string first \033 $text] <0 && [string first \009c $text] <0} { +# #\033 same as \x1b +# return $text +# } +# +# set text [convert_g0 $text] +# +# #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. +# #line endings can theoretically occur within an ansi escape sequence (review e.g title?) +# set inputlist [split $text ""] +# set outputlist [list] +# +# set 2bytecodes [dict values $ansi_2byte_codes_dict] +# +# set in_escapesequence 0 +# #assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls +# set i 0 +# foreach u $inputlist { +# set v [lindex $inputlist $i+1] +# set uv ${u}${v} +# if {$in_escapesequence eq "2b"} { +# #2nd byte - done. +# set in_escapesequence 0 +# } elseif {$in_escapesequence != 0} { +# set escseq [tcl::dict::get $escape_terminals $in_escapesequence] +# if {$u in $escseq} { +# set in_escapesequence 0 +# } elseif {$uv in $escseq} { +# set in_escapseequence 2b ;#flag next byte as last in sequence +# } +# } else { +# #handle both 7-bit and 8-bit CSI and OSC +# if {[regexp {^(?:\033\[|\u009b)} $uv]} { +# set in_escapesequence CSI +# } elseif {[regexp {^(?:\033\]|\u009c)} $uv]} { +# set in_escapesequence OSC +# } elseif {$uv in $2bytecodes} { +# #self-contained e.g terminal reset - don't pass through. +# set in_escapesequence 2b +# } else { +# lappend outputlist $u +# } +# } +# incr i +# } +# return [join $outputlist ""] +#} + + + + + +proc overtype::string_columns {text} { + if {[punk::ansi::ta::detect $text]} { + #error "error string_columns is for calculating character length of string - ansi codes must be stripped/rendered first e.g with punk::ansi::stripansi. Alternatively try punk::ansi::printing_length" + set text [punk::ansi::stripansi $text] + } + return [punk::char::ansifreestring_width $text] +} + +#todo - consider a way to merge overtype::left/centre/right +#These have similar algorithms/requirements - and should be refactored to be argument-wrappers over a function called something like overtype::renderblock +#overtype::renderblock could render the input to a defined (possibly overflowing in x or y) rectangle overlapping the underlay. +#(i.e not even necessariy having it's top left within the underlay) +tcl::namespace::eval overtype::priv { +} + +#could return larger than colwidth +proc _get_row_append_column {row} { + upvar outputlines outputlines + set idx [expr {$row -1}] + if {$row <= 1 || $row > [llength $outputlines]} { + return 1 + } else { + upvar opt_overflow opt_overflow + upvar colwidth colwidth + set existinglen [punk::ansi::printing_length [lindex $outputlines $idx]] + set endpos [expr {$existinglen +1}] + if {$opt_overflow} { + return $endpos + } else { + if {$endpos > $colwidth} { + return $colwidth + 1 + } else { + return $endpos + } + } + } +} + +tcl::namespace::eval overtype { + #*** !doctools + #[subsection {Namespace overtype}] + #[para] Core API functions for overtype + #[list_begin definitions] + + + + #tcl::string::range should generally be avoided for both undertext and overtext which contain ansi escapes and other cursor affecting chars such as \b and \r + #render onto an already-rendered (ansi already processed) 'underlay' string, a possibly ansi-laden 'overlay' string. + #The underlay and overlay can be multiline blocks of text of varying line lengths. + #The overlay may just be an ansi-colourised block - or may contain ansi cursor movements and cursor save/restore calls - in which case the apparent length and width of the overlay can't be determined as if it was a block of text. + #This is a single-shot rendering of strings - ie there is no way to chain another call containing a cursor-restore to previously rendered output and have it know about any cursor-saves in the first call. + # a cursor start position other than top-left is a possible addition to consider. + #see editbuf in punk::repl for a more stateful ansi-processor. Both systems use loops over overtype::renderline + proc renderspace {args} { + #*** !doctools + #[call [fun overtype::renderspace] [arg args] ] + #[para] usage: ?-transparent [lb]0|1[rb]? ?-overflow [lb]1|0[rb]? ?-ellipsis [lb]1|0[rb]? ?-ellipsistext ...? undertext overtext + + # @c overtype starting at left (overstrike) + # @c can/should we use something like this?: 'format "%-*s" $len $overtext + variable default_ellipsis_horizontal + + if {[llength $args] < 2} { + error {usage: ?-transparent [0|1]? ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} + } + lassign [lrange $args end-1 end] underblock overblock + set opts [tcl::dict::create\ + -bias ignored\ + -width \uFFEF\ + -height \uFFEF\ + -startcolumn 1\ + -wrap 0\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -appendlines 1\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -experimental 0\ + -looplimit \uFFEF\ + ] + #-ellipsis args not used if -wrap is true + set argsflags [lrange $args 0 end-2] + foreach {k v} $argsflags { + switch -- $k { + -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -appendlines - -transparent - -exposed1 - -exposed2 - -experimental { + tcl::dict::set opts $k $v + } + default { + error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + #set opts [tcl::dict::merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_overflow [tcl::dict::get $opts -overflow] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### + #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. + set opt_width [tcl::dict::get $opts -width] + set opt_height [tcl::dict::get $opts -height] + set opt_startcolumn [tcl::dict::get $opts -startcolumn] + set opt_appendlines [tcl::dict::get $opts -appendlines] + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_exposed1 [tcl::dict::get $opts -exposed1] ;#widechar_exposed_left - todo + set opt_exposed2 [tcl::dict::get $opts -exposed2] ;#widechar_exposed_right - todo + # -- --- --- --- --- --- + + # ---------------------------- + # -experimental dev flag to set flags etc + # ---------------------------- + set data_mode 0 + set test_mode 1 + set info_mode 0 + set edit_mode 0 + set opt_experimental [tcl::dict::get $opts -experimental] + foreach o $opt_experimental { + switch -- $o { + test_mode { + set test_mode 1 + set info_mode 1 + } + old_mode { + set test_mode 0 + set info_mode 1 + } + data_mode { + set data_mode 1 + } + info_mode { + set info_mode 1 + } + edit_mode { + set edit_mode 1 + } + } + } + set test_mode 1 ;#try to eliminate + # ---------------------------- + + #modes + set insert_mode 0 ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l + set autowrap_mode $opt_wrap + set reverse_mode 0 + + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + + #set underlines [split $underblock \n] + + #underblock is a 'rendered' block - so width height make sense + #colwidth & colheight were originally named with reference to rendering into a 'column' of output e.g a table column - before cursor row/col was implemented. + #The naming is now confusing. It should be something like renderwidth renderheight ?? review + + if {$opt_width eq "\uFFEF" || $opt_height eq "\uFFEF"} { + lassign [blocksize $underblock] _w colwidth _h colheight + if {$opt_width ne "\uFFEF"} { + set colwidth $opt_width + } + if {$opt_height ne "\uFFEF"} { + set colheight $opt_height + } + } else { + set colwidth $opt_width + set colheight $opt_height + } + + # -- --- --- --- + #REVIEW - do we need ansi resets in the underblock? + if {$underblock eq ""} { + set underlines [lrepeat $colheight ""] + } else { + set underlines [split $underblock \n] + } + #if {$underblock eq ""} { + # set blank "\x1b\[0m\x1b\[0m" + # #set underlines [list "\x1b\[0m\x1b\[0m"] + # set underlines [lrepeat $colheight $blank] + #} else { + # #lines_as_list -ansiresets 1 will do nothing if -ansiresets 1 isn't specified - REVIEW + # set underlines [lines_as_list -ansiresets 1 $underblock] + #} + # -- --- --- --- + + #todo - reconsider the 'line' as the natural chunking mechanism for the overlay. + #In practice an overlay ANSI stream can be a single line with ansi moves/restores etc - or even have no moves or newlines, just relying on wrapping at the output colwidth + #In such cases - we process the whole shebazzle for the first output line - only reducing by the applied amount at the head each time, reprocessing the long tail each time. + #(in cases where there are interline moves or cursor jumps anyway) + #This works - but doesn't seem efficient. + #On the other hand.. maybe it depends on the data. For simpler files it's more efficient than splitting first + + #a hack until we work out how to avoid infinite loops... + # + set looplimit [tcl::dict::get $opts -looplimit] + if {$looplimit eq "\uFFEF"} { + #looping for each char is worst case (all newlines?) - anything over that is an indication of something broken? + #do we need any margin above the length? (telnet mapscii.me test) + set looplimit [expr {[tcl::string::length $overblock] + 10}] + } + + if {!$test_mode} { + set inputchunks [split $overblock \n] + } else { + set scheme 3 + switch -- $scheme { + 0 { + #one big chunk + set inputchunks [list $overblock] + } + 1 { + set inputchunks [punk::ansi::ta::split_codes $overblock] + } + 2 { + + #split into lines if possible first - then into plaintext/ansi-sequence chunks ? + set inputchunks [list ""] ;#put an empty plaintext split in for starters + set i 1 + set lines [split $overblock \n] + foreach ln $lines { + if {$i < [llength $lines]} { + append ln \n + } + set sequence_split [punk::ansi::ta::split_codes_single $ln] ;#use split_codes Not split_codes_single? + set lastpt [lindex $inputchunks end] + lset inputchunks end [tcl::string::cat $lastpt [lindex $sequence_split 0]] + lappend inputchunks {*}[lrange $sequence_split 1 end] + incr i + } + } + 3 { + #it turns out line based chunks are faster than the above.. probably because some of those end up doing the regex splitting twice + set lflines [list] + set inputchunks [split $overblock \n] + foreach ln $inputchunks { + append ln \n + lappend lflines $ln + } + if {[llength $lflines]} { + lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] + } + set inputchunks $lflines[unset lflines] + + } + } + } + + + #overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height + #lassign [blocksize $overblock] _w overblock_width _h overblock_height + + + set replay_codes_underlay [tcl::dict::create 1 ""] + #lappend replay_codes_overlay "" + set replay_codes_overlay "" + set unapplied "" + set cursor_saved_position [tcl::dict::create] + set cursor_saved_attributes "" + + + set outputlines $underlines + set overidx 0 + + #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext + set row 1 + if {$data_mode} { + set col [_get_row_append_column $row] + } else { + set col $opt_startcolumn + } + + set instruction_stats [tcl::dict::create] + + set loop 0 + #while {$overidx < [llength $inputchunks]} { } + + while {[llength $inputchunks]} { + #set overtext [lindex $inputchunks $overidx]; lset inputchunks $overidx "" + set overtext [lpop inputchunks 0] + if {![tcl::string::length $overtext]} { + incr loop + continue + } + #puts "----->[ansistring VIEW -lf 1 -vt 1 -nul 1 $overtext]<----" + set undertext [lindex $outputlines [expr {$row -1}]] + set renderedrow $row + + #renderline pads each underaly line to width with spaces and should track where end of data is + + + #set overtext [tcl::string::cat [lindex $replay_codes_overlay $overidx] $overtext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + if {[tcl::dict::exists $replay_codes_underlay $row]} { + set undertext [tcl::string::cat [tcl::dict::get $replay_codes_underlay $row] $undertext] + } + #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - + #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l + set LASTCALL [list -info 1 -insert_mode $insert_mode -autowrap_mode $autowrap_mode -transparent $opt_transparent -width $colwidth -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -cursor_column $col -cursor_row $row $undertext $overtext] + set rinfo [renderline -experimental $opt_experimental\ + -info 1\ + -insert_mode $insert_mode\ + -cursor_restore_attributes $cursor_saved_attributes\ + -autowrap_mode $autowrap_mode\ + -transparent $opt_transparent\ + -width $colwidth\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -overflow $opt_overflow\ + -cursor_column $col\ + -cursor_row $row\ + $undertext\ + $overtext\ + ] + set instruction [tcl::dict::get $rinfo instruction] + set insert_mode [tcl::dict::get $rinfo insert_mode] + set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + #set reverse_mode [tcl::dict::get $rinfo reverse_mode];#how to support in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + + + + #-- todo - detect looping properly + if {$row > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == $row && $instruction eq ""} { + puts stderr "overtype::renderspace loop?" + puts [ansistring VIEW $rinfo] + break + } + #-- + + if {[tcl::dict::size $c_saved_pos] >= 1} { + set cursor_saved_position $c_saved_pos + set cursor_saved_attributes $c_saved_attributes + } + + + set overflow_handled 0 + + + + set nextprefix "" + + + #todo - handle potential insertion mode as above for cursor restore? + #keeping separate branches for debugging - review and merge as appropriate when stable + tcl::dict::incr instruction_stats $instruction + switch -- $instruction { + {} { + if {$test_mode == 0} { + incr row + if {$data_mode} { + set col [_get_row_append_column $row] + if {$col > $colwidth} { + + } + } else { + set col 1 + } + } else { + #lf included in data + set row $post_render_row + set col $post_render_col + + #set col 1 + #if {$post_render_row != $renderedrow} { + # set col 1 + #} else { + # set col $post_render_col + #} + } + } + up { + + #renderline knows it's own line number, and knows not to go above row l + #it knows that a move whilst 1-beyond the width conflicts with the linefeed and reduces the move by one accordingly. + #row returned should be correct. + #column may be the overflow column - as it likes to report that to the caller. + + #Note that an ansi up sequence after last column going up to a previous line and also beyond the last column, will result in the next grapheme going onto the following line. + #this seems correct - as the column remains beyond the right margin so subsequent chars wrap (?) review + #puts stderr "up $post_render_row" + #puts stderr "$rinfo" + + #puts stdout "1 row:$row col $col" + set row $post_render_row + #data_mode (naming?) determines if we move to end of existing data or not. + #data_mode 0 means ignore existing line length and go to exact column + #set by -experimental flag + if {$data_mode == 0} { + set col $post_render_col + } else { + #This doesn't really work if columns are pre-filled with spaces..we can't distinguish them from data + #we need renderline to return the number of the maximum column filled (or min if we ever do r-to-l) + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + } + + #puts stdout "2 row:$row col $col" + #puts stdout "-----------------------" + #puts stdout $rinfo + #flush stdout + } + down { + if {$data_mode == 0} { + #renderline doesn't know how far down we can go.. + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } + set row $post_render_row + set col $post_render_col + } else { + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + + } + } + restore_cursor { + #testfile belinda.ans uses this + + #puts stdout "[a+ blue bold]CURSOR_RESTORE[a]" + if {[tcl::dict::exists $cursor_saved_position row]} { + set row [tcl::dict::get $cursor_saved_position row] + set col [tcl::dict::get $cursor_saved_position column] + #puts stdout "restoring: row $row col $col [ansistring VIEW $cursor_saved_attributes] [a] unapplied [ansistring VIEWCODES $unapplied]" + #set nextprefix $cursor_saved_attributes + #lset replay_codes_overlay [expr $overidx+1] $cursor_saved_attributes + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay]$cursor_saved_attributes + #set replay_codes_overlay $cursor_saved_attributes + set cursor_saved_position [tcl::dict::create] + set cursor_saved_attributes "" + } else { + #TODO + #?restore without save? + #should move to home position and reset ansi SGR? + #puts stderr "overtype::renderspace cursor_restore without save data available" + } + #If we were inserting prior to hitting the cursor_restore - there could be overflow_right data - generally the overtype functions aren't for inserting - but ansi can enable it + #if we were already in overflow when cursor_restore was hit - it shouldn't have been processed as an action - just stored. + if {!$overflow_handled && $overflow_right ne ""} { + #wrap before restore? - possible effect on saved cursor position + #this overflow data has previously been rendered so has no cursor movements or further save/restore operations etc + #we can just insert another call to renderline to solve this.. ? + #It would perhaps be more properly handled as a queue of instructions from our initial renderline call + #we don't need to worry about overflow next call (?)- but we should carry forward our gx and ansi stacks + + puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" + + set sub_info [overtype::renderline -info 1 -width $colwidth -insert_mode $insert_mode -autowrap_mode $autowrap_mode -overflow [tcl::dict::get $opts -overflow] "" $overflow_right] + set foldline [tcl::dict::get $sub_info result] + set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed.. + set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + linsert outputlines $renderedrow $foldline + #review - row & col set by restore - but not if there was no save.. + } + set overflow_handled 1 + + } + move { + ######## + if {$post_render_row > [llength $outputlines]} { + #Ansi moves need to create new lines ? + #if {$opt_appendlines} { + # set diff [expr {$post_render_row - [llength $outputlines]}] + # if {$diff > 0} { + # lappend outputlines {*}[lrepeat $diff ""] + # } + # set row $post_render_row + #} else { + set row [llength $outputlines] + #} + } else { + set row $post_render_row + } + ####### + set col $post_render_col + #overflow + unapplied? + } + lf_start { + #raw newlines - must be test_mode + # ---------------------- + #test with fruit.ans + #test - treating as newline below... + #append rendered $overflow_right + #set overflow_right "" + set row $renderedrow + incr row + if {$row > [llength $outputlines]} { + lappend outputlines "" + } + set col $opt_startcolumn + # ---------------------- + } + lf_mid { + + if 0 { + #set rhswidth [punk::ansi::printing_length $overflow_right] + #only show debug when we have overflow? + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + + set rhs "" + if {$overflow_right ne ""} { + set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $overflow_right]] + set rhs [textblock::frame -title overflow_right $rhs] + } + puts [textblock::join $lhs " $post_render_col " $rhs] + } + + if {!$test_mode} { + #rendered + append rendered $overflow_right + #set replay_codes_overlay "" + set overflow_right "" + + + set row $renderedrow + + set col $opt_startcolumn + incr row + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } else { + set edit_mode 0 + if {$edit_mode} { + set inputchunks [linsert $inputchunks 0 $overflow_right$unapplied] + set overflow_right "" + set unapplied "" + set row $post_render_row + #set col $post_render_col + set col $opt_startcolumn + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } else { + append rendered $overflow_right + set overflow_right "" + set row $post_render_row + set col $opt_startcolumn + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } + } + } + lf_overflow { + #linefeed after colwidth e.g at column 81 for an 80 col width + #we may also have other control sequences that came after col 80 e.g cursor save + + if 0 { + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" + + #assertion - there should be no overflow.. + puts $lhs + } + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right + + set row $post_render_row + #set row $renderedrow + #incr row + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + set col $opt_startcolumn + + } + newlines_above { + #we get a newlines_above instruction when received at column 1 + #In some cases we want to treat that as request to insert a new blank line above, and move our row 1 down (staying with the data) + #in other cases - we want to treat at column 1 the same as any other + + puts "--->newlines_above" + puts "rinfo: $rinfo" + #renderline doesn't advance the row for us - the caller has the choice to implement or not + set row $post_render_row + set col $post_render_col + if {$insert_lines_above > 0} { + set row $renderedrow + set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] + incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above + #? set row $post_render_row #can renderline tell us? + } + } + newlines_below { + #obsolete? - use for ANSI insert lines sequence + if {$data_mode == 0} { + puts --->nl_below + set row $post_render_row + set col $post_render_col + if {$insert_lines_below == 1} { + if {$test_mode == 0} { + set row $renderedrow + set outputlines [linsert $outputlines [expr {$renderedrow }] {*}[lrepeat $insert_lines_below ""]] ;#note - linsert can add to end too + incr row $insert_lines_below + set col $opt_startcolumn + } else { + #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] + #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] + #set rhs "" + #if {$overflow_right ne ""} { + # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] + # set rhs [textblock::frame -title overflow_right $rhs] + #} + #puts [textblock::join $lhs $rhs] + + #rendered + append rendered $overflow_right + # + + + set overflow_right "" + set row $renderedrow + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat $insert_lines_below ""] + } + incr row $insert_lines_below + set col $opt_startcolumn + + + + } + } + } else { + set row $post_render_row + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } else { + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + } + } + } + wrapmoveforward { + #doesn't seem to be used by fruit.ans testfile + #used by dzds.ans + #note that cursor_forward may move deep into the next line - or even span multiple lines !TODO + set c $colwidth + set r $post_render_row + if {$post_render_col > $colwidth} { + set i $c + while {$i <= $post_render_col} { + if {$c == $colwidth+1} { + incr r + if {$opt_appendlines} { + if {$r < [llength $outputlines]} { + lappend outputlines "" + } + } + set c $opt_startcolumn + } else { + incr c + } + incr i + } + set col $c + } else { + #why are we getting this instruction then? + puts stderr "wrapmoveforward - test" + set r [expr {$post_render_row +1}] + set c $post_render_col + } + set row $r + set col $c + } + wrapmovebackward { + set c $colwidth + set r $post_render_row + if {$post_render_col < 1} { + set c 1 + set i $c + while {$i >= $post_render_col} { + if {$c == 0} { + if {$r > 1} { + incr r -1 + set c $colwidth + } else { + #leave r at 1 set c 1 + #testfile besthpav.ans first line top left border alignment + set c 1 + break + } + } else { + incr c -1 + } + incr i -1 + } + set col $c + } else { + puts stderr "Wrapmovebackward - but postrendercol >= 1???" + } + set row $r + set col $c + } + overflow { + #normal single-width grapheme overflow + #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" + set row $post_render_row ;#renderline will not advance row when reporting overflow char + if {$autowrap_mode} { + incr row + set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? + } else { + #this works for test_mode (which should become the default) - but could give a bad result otherwise - review - add tests fix. + set col $post_render_col + #set unapplied "" ;#this seems wrong? + #set unapplied [tcl::string::range $unapplied 1 end] + #The overflow can only be triggered by a grapheme (todo cluster?) - but our unapplied could contain SGR codes prior to the grapheme that triggered overflow - so we need to skip beyond any SGRs + #There may be more than one, because although the stack leading up to overflow may have been merged - codes between the last column and the overflowing grapheme will remain separate + #We don't expect any movement or other ANSI codes - as if they came before the grapheme, they would have triggered a different instruction to 'overflow' + set idx 0 + set next_grapheme_index -1 + foreach u $unapplied_list { + if {![punk::ansi::ta::detect $u]} { + set next_grapheme_index $idx + break + } + incr idx + } + assert {$next_grapheme_index >= 0} + #drop the overflow grapheme - keeping all codes in place. + set unapplied [join [lreplace $unapplied_list $next_grapheme_index $next_grapheme_index] ""] + #we need to run the reduced unapplied on the same line - further graphemes will just overflow again, but codes or control chars could trigger jumps to other lines + + set overflow_handled 1 + #handled by dropping overflow if any + } + } + overflow_splitchar { + set row $post_render_row ;#renderline will not advance row when reporting overflow char + + #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts + #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc + if {$autowrap_mode} { + if {$colwidth < 2} { + #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character + set idx 0 + set triggering_grapheme_index -1 + foreach u $unapplied_list { + if {![punk::ansi::ta::detect $u]} { + set triggering_grapheme_index $idx + break + } + incr idx + } + set unapplied [join [lreplace $unapplied_list $triggering_grapheme_index $triggering_grapheme_index $opt_exposed1] ""] + } else { + set col $opt_startcolumn + incr row + } + } else { + set overflow_handled 1 + #handled by dropping entire overflow if any + if {$colwidth < 2} { + set idx 0 + set triggering_grapheme_index -1 + foreach u $unapplied_list { + if {![punk::ansi::ta::detect $u]} { + set triggering_grapheme_index $idx + break + } + incr idx + } + set unapplied [join [lreplace $unapplied_list $triggering_grapheme_index $triggering_grapheme_index $opt_exposed1] ""] + } + } + + } + vt { + + #can vt add a line like a linefeed can? + set row $post_render_row + set col $post_render_col + } + default { + puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" + } + + } + + + if {!$opt_overflow && !$autowrap_mode} { + #not allowed to overflow column or wrap therefore we get overflow data to truncate + if {[tcl::dict::get $opts -ellipsis]} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + if {[tcl::string::trim [punk::ansi::stripansi $lostdata]] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::right $rendered $opt_ellipsistext] + } + set overflow_handled 1 + } else { + #no wrap - no ellipsis - silently truncate + set overflow_handled 1 + } + } + + + + if {$renderedrow <= [llength $outputlines]} { + lset outputlines [expr {$renderedrow-1}] $rendered + } else { + if {$opt_appendlines} { + lappend outputlines $rendered + } else { + #? + lset outputlines [expr {$renderedrow-1}] $rendered + } + } + + if {!$overflow_handled} { + append nextprefix $overflow_right + } + + append nextprefix $unapplied + + if 0 { + if {$nextprefix ne ""} { + set nextoveridx [expr {$overidx+1}] + if {$nextoveridx >= [llength $inputchunks]} { + lappend inputchunks $nextprefix + } else { + #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] + set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] + } + } + } + + if {$nextprefix ne ""} { + set inputchunks [linsert $inputchunks 0 $nextprefix] + } + + + incr overidx + incr loop + if {$loop >= $looplimit} { + puts stderr "overtype::renderspace looplimit reached ($looplimit)" + lappend outputlines "[a+ red bold] - looplimit $looplimit reached[a]" + set Y [a+ yellow bold] + set RST [a] + set sep_header ----DEBUG----- + set debugmsg "" + append debugmsg "${Y}${sep_header}${RST}" \n + append debugmsg "looplimit $looplimit reached\n" + append debugmsg "test_mode:$test_mode\n" + append debugmsg "data_mode:$data_mode\n" + append debugmsg "opt_appendlines:$opt_appendlines\n" + append debugmsg "prev_row :[tcl::dict::get $LASTCALL -cursor_row]\n" + append debugmsg "prev_col :[tcl::dict::get $LASTCALL -cursor_column]\n" + tcl::dict::for {k v} $rinfo { + append debugmsg "${Y}$k [ansistring VIEW -lf 1 -vt 1 $v]$RST" \n + } + append debugmsg "${Y}[string repeat - [string length $sep_header]]$RST" \n + + puts stdout $debugmsg + #todo - config regarding error dumps rather than just dumping in working dir + set fd [open [pwd]/error_overtype.txt w] + puts $fd $debugmsg + close $fd + error $debugmsg + break + } + } + + set result [join $outputlines \n] + if {$info_mode} { + #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? + #append result \n$instruction_stats\n + } + return $result + } + + #todo - left-right ellipsis ? + proc centre {args} { + variable default_ellipsis_horizontal + if {[llength $args] < 2} { + error {usage: ?-transparent [0|1]? ?-bias [left|right]? ?-overflow [1|0]? undertext overtext} + } + + foreach {underblock overblock} [lrange $args end-1 end] break + + #todo - vertical vs horizontal overflow for blocks + set opts [tcl::dict::create\ + -bias left\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + ] + set argsflags [lrange $args 0 end-2] + foreach {k v} $argsflags { + switch -- $k { + -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 { + tcl::dict::set opts $k $v + } + default { + set known_opts [tcl::dict::keys $opts] + error "overtype::centre unknown option '$k'. Known options: $known_opts" + } + } + } + #set opts [tcl::dict::merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsis [tcl::dict::get $opts -ellipsis] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + # -- --- --- --- --- --- + + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {$colwidth - $overblock_width}] + if {$under_exposed_max > 0} { + #background block is wider + if {$under_exposed_max % 2 == 0} { + #even left/right exposure + set left_exposed [expr {$under_exposed_max / 2}] + } else { + set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division + if {[tcl::string::tolower [tcl::dict::get $opts -bias]] eq "left"} { + set left_exposed $beforehalf + } else { + #bias to the right + set left_exposed [expr {$beforehalf + 1}] + } + } + } else { + set left_exposed 0 + } + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + set undertext "$undertext[string repeat { } $udiff]" + } + set undertext [tcl::string::cat $replay_codes_underlay $undertext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + #review - right-to-left langs should elide on left! - extra option required + + if {$overflowlength > 0} { + #overlay line wider or equal + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow [tcl::dict::get $opts -overflow] -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set unapplied [tcl::dict::get $rinfo unapplied] + #todo - get replay_codes from overflow_right instead of wherever it was truncated? + + #overlay line data is wider - trim if overflow not specified in opts - and overtype an ellipsis at right if it was specified + if {![tcl::dict::get $opts -overflow]} { + #lappend outputlines [tcl::string::range $overtext 0 [expr {$colwidth - 1}]] + #set overtext [tcl::string::range $overtext 0 $colwidth-1 ] + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + #don't use tcl::string::range on ANSI data + #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::right $rendered $opt_ellipsistext] + } + } + } + lappend outputlines $rendered + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent $undertext $overtext] + } else { + #background block is wider than or equal to data for this line + #lappend outputlines [renderline -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + set rinfo [renderline -info 1 -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + lappend outputlines [tcl::dict::get $rinfo result] + } + set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + } + return [join $outputlines \n] + } + + #overtype::right is for a rendered ragged underblock and a rendered ragged overblock + #ie we can determine the block width for bost by examining the lines and picking the longest. + # + proc right {args} { + #NOT the same as align-right - which should be done to the overblock first if required + variable default_ellipsis_horizontal + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? ?-transparent 0|? undertext overtext} + } + foreach {underblock overblock} [lrange $args end-1 end] break + + set opts [tcl::dict::create\ + -bias ignored\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -align "left"\ + ] + set argsflags [lrange $args 0 end-2] + tcl::dict::for {k v} $argsflags { + switch -- $k { + -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -align { + tcl::dict::set opts $k $v + } + default { + set known_opts [tcl::dict::keys $opts] + error "overtype::centre unknown option '$k'. Known options: $known_opts" + } + } + } + #set opts [tcl::dict::merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsis [tcl::dict::get $opts -ellipsis] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_overflow [tcl::dict::get $opts -overflow] + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + set opt_align [tcl::dict::get $opts -align] + # -- --- --- --- --- --- + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] + set left_exposed $under_exposed_max + + + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + #puts xxx + append undertext [string repeat { } $udiff] + } + if {$overtext_datalen < $overblock_width} { + set odiff [expr {$overblock_width - $overtext_datalen}] + switch -- $opt_align { + left { + set startoffset 0 + } + right { + set startoffset $odiff + } + default { + set half [expr {$odiff / 2}] + #set lhs [string repeat { } $half] + #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left + #set rhs [string repeat { } $righthalf] + set startoffset $half + } + } + } else { + set startoffset 0 ;#negative? + } + + set undertext [tcl::string::cat $replay_codes_underlay $undertext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + if {$overflowlength > 0} { + #raw overtext wider than undertext column + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] + set replay_codes [tcl::dict::get $rinfo replay_codes] + set rendered [tcl::dict::get $rinfo result] + if {!$opt_overflow} { + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set ellipsis [tcl::string::cat $replay_codes $opt_ellipsistext] + #todo - overflow on left if allign = right?? + set rendered [overtype::right $rendered $ellipsis] + } + } + } + lappend outputlines $rendered + } else { + #padded overtext + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] + #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] + lappend outputlines [tcl::dict::get $rinfo result] + } + set replay_codes [tcl::dict::get $rinfo replay_codes] + set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + } + + return [join $outputlines \n] + } + + proc left {args} { + overtype::block -blockalign left {*}$args + } + #overtype a (possibly ragged) underblock with a (possibly ragged) overblock + proc block {args} { + variable default_ellipsis_horizontal + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-blockalign left|centre|right? ?-textalign left|centre|right? ?-overflow [1|0]? ?-transparent 0|? undertext overtext} + } + #foreach {underblock overblock} [lrange $args end-1 end] break + lassign [lrange $args end-1 end] underblock overblock + + set opts [tcl::dict::create\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -textalign "left"\ + -textvertical "top"\ + -blockalign "left"\ + -blockalignbias left\ + -blockvertical "top"\ + ] + set argsflags [lrange $args 0 end-2] + tcl::dict::for {k v} $argsflags { + switch -- $k { + -blockalignbias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -textalign - -blockalign - -blockvertical { + tcl::dict::set opts $k $v + } + default { + error "overtype::block unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + # -- --- --- --- --- --- + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsis [tcl::dict::get $opts -ellipsis] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_overflow [tcl::dict::get $opts -overflow] + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + set opt_textalign [tcl::dict::get $opts -textalign] + set opt_blockalign [tcl::dict::get $opts -blockalign] + if {$opt_blockalign eq "center"} { + set opt_blockalign "centre" + } + # -- --- --- --- --- --- + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] + + switch -- $opt_blockalign { + left { + set left_exposed 0 + } + right { + set left_exposed $under_exposed_max + } + centre { + if {$under_exposed_max > 0} { + #background block is wider + if {$under_exposed_max % 2 == 0} { + #even left/right exposure + set left_exposed [expr {$under_exposed_max / 2}] + } else { + set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division + if {[tcl::string::tolower [tcl::dict::get $opts -blockalignbias]] eq "left"} { + set left_exposed $beforehalf + } else { + #bias to the right + set left_exposed [expr {$beforehalf + 1}] + } + } + } else { + set left_exposed 0 + } + } + default { + set left_exposed 0 + } + } + + + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + #puts xxx + append undertext [string repeat { } $udiff] + } + if {$overtext_datalen < $overblock_width} { + set odiff [expr {$overblock_width - $overtext_datalen}] + switch -- $opt_textalign { + left { + set startoffset 0 + } + right { + set startoffset $odiff + } + default { + set half [expr {$odiff / 2}] + #set lhs [string repeat { } $half] + #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left + #set rhs [string repeat { } $righthalf] + set startoffset $half + } + } + } else { + set startoffset 0 ;#negative? + } + + set undertext [tcl::string::cat $replay_codes_underlay $undertext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + if {$overflowlength > 0} { + #raw overtext wider than undertext column + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] + set replay_codes [tcl::dict::get $rinfo replay_codes] + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set unapplied [tcl::dict::get $rinfo unapplied] + if {!$opt_overflow} { + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + #don't use tcl::string::range on ANSI data + #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::block -blockalign right $rendered $opt_ellipsistext] + } + } + + #if {$opt_ellipsis} { + # set show_ellipsis 1 + # if {!$opt_ellipsiswhitespace} { + # #we don't want ellipsis if only whitespace was lost + # set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + # if {[tcl::string::trim $lostdata] eq ""} { + # set show_ellipsis 0 + # } + # } + # if {$show_ellipsis} { + # set ellipsis [tcl::string::cat $replay_codes $opt_ellipsistext] + # #todo - overflow on left if allign = right?? + # set rendered [overtype::right $rendered $ellipsis] + # } + #} + } + lappend outputlines $rendered + } else { + #padded overtext + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] + #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set unapplied [tcl::dict::get $rinfo unapplied] + lappend outputlines [tcl::dict::get $rinfo result] + } + set replay_codes [tcl::dict::get $rinfo replay_codes] + set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + } + + return [join $outputlines \n] + } + # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + # renderline written from a left-right line orientation perspective as a first-shot at getting something useful. + # ultimately right-to-left, top-to-bottom and bottom-to-top are probably needed. + # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + # + # + #-returnextra enables returning of overflow and length + #review - use punk::ansi::ta::detect to short-circuit processing and do simpler string calcs as an optimisation? + #review - DECSWL/DECDWL double width line codes - very difficult/impossible to align and compose with other elements + #(could render it by faking it with sixels and a lot of work - find/make a sixel font and ensure it's exactly 2 cols per char) + #todo - review transparency issues with single/double width characters + #bidi - need a base direction and concept of directional runs for RTL vs LTR - may be best handled at another layer? + proc renderline {args} { + #*** !doctools + #[call [fun overtype::renderline] [arg args] ] + #[para] renderline is the core engine for overtype string processing (frames & textblocks), and the raw mode commandline repl for the Tcl Punk Shell + #[para] It is also a central part of an ansi (micro) virtual terminal-emulator of sorts + #[para] This system does a half decent job at rendering 90's ANSI art to manipulable colour text blocks that can be joined & framed for layout display within a unix or windows terminal + #[para] Renderline helps maintain ANSI text styling reset/replay codes so that the styling of one block doesn't affect another. + #[para] Calling on the punk::ansi library - it can coalesce codes to keep the size down. + #[para] It is a giant mess of doing exactly what common wisdom says not to do... lots at once. + #[para] renderline is part of the Unicode and ANSI aware Overtype system which 'renders' a block of text onto a static underlay + #[para] The underlay is generally expected to be an ordered set of lines or a rectangular text block analogous to a terminal screen - but it can also be ragged in line length, or just blank. + #[para] The overlay couuld be similar - in which case it may often be used to overwrite a column or section of the underlay. + #[para] The overlay could however be a sequence of ANSI-laden text that jumps all over the place. + # + #[para] renderline itself only deals with a single line - or sometimes a single character. It is generally called from a loop that does further terminal-like or textblock processing. + #[para] By suppyling the -info 1 option - it can return various fields indicating the state of the render. + #[para] The main 3 are the result, overflow_right, and unapplied. + #[para] Renderline handles cursor movements from either keystrokes or ANSI sequences but for a full system the aforementioned loop will need to be in place to manage the set of lines under manipulation. + + if {[llength $args] < 2} { + error {usage: ?-info 0|1? ?-startcolumn ? ?-cursor_column ? ?-cursor_row |""? ?-transparent [0|1|]? ?-overflow [1|0]? undertext overtext} + } + lassign [lrange $args end-1 end] under over + if {[string first \n $under] >= 0} { + error "overtype::renderline not allowed to contain newlines in undertext" + } + #if {[string first \n $over] >=0 || [string first \n $under] >= 0} { + # error "overtype::renderline not allowed to contain newlines" + #} + + #generally faster to create a new dict in the proc than to use a namespace variable to create dict once and link to variable (2024 8.6/8.7) + set opts [tcl::dict::create\ + -etabs 0\ + -width \uFFEF\ + -overflow 0\ + -transparent 0\ + -startcolumn 1\ + -cursor_column 1\ + -cursor_row ""\ + -insert_mode 1\ + -autowrap_mode 1\ + -reverse_mode 0\ + -info 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -cursor_restore_attributes ""\ + -cp437 0\ + -experimental {}\ + ] + #-cursor_restore_attributes only - for replay stack - position and actual setting/restoring handled by throwback to caller + + #cursor_row, when numeric will allow detection of certain row moves that are still within our row - allowing us to avoid an early return + #An empty string for cursor_row tells us we have no info about our own row context, and to return with an unapplied string if any row move occurs + + #exposed1 and exposed2 for first and second col of underying 2wide char which is truncated by transparency or overflow + #todo - return info about such grapheme 'cuts' in -info structure and/or create option to raise an error + + set argsflags [lrange $args 0 end-2] + tcl::dict::for {k v} $argsflags { + switch -- $k { + -experimental - -cp437 - -width - -overflow - -transparent - -startcolumn - -cursor_column - -cursor_row - -insert_mode - -autowrap_mode - -reverse_mode - -info - -exposed1 - -exposed2 - -cursor_restore_attributes { + tcl::dict::set opts $k $v + } + default { + error "overtype::renderline unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_etabs [tcl::dict::get $opts -etabs] + set opt_overflow [tcl::dict::get $opts -overflow] + set opt_colstart [tcl::dict::get $opts -startcolumn] ;#lhs limit for overlay - an offset to cursor_column - first visible column is 1. 0 or < 0 are before the start of the underlay + set opt_colcursor [tcl::dict::get $opts -cursor_column];#start cursor column relative to overlay + set opt_row_context [tcl::dict::get $opts -cursor_row] + if {[string length $opt_row_context]} { + if {![tcl::string::is integer -strict $opt_row_context] || $opt_row_context <1 } { + error "overtype::renderline -cursor_row must be empty for unspecified/unknown or a non-zero positive integer. received: '$opt_row_context'" + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + #The _mode flags correspond to terminal modes that can be set/reset via escape sequences (e.g DECAWM wraparound mode) + set opt_insert_mode [tcl::dict::get $opts -insert_mode];#should usually be 1 for each new line in editor mode but must be initialised to 1 externally (review) + #default is for overtype + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_autowrap_mode [tcl::dict::get $opts -autowrap_mode] ;#DECAWM - char or movement can go beyond leftmost/rightmost col to prev/next line + set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] ;#DECSNM + # -- --- --- --- --- --- --- --- --- --- --- --- + set temp_cursor_saved [tcl::dict::get $opts -cursor_restore_attributes] + + set test_mode 0 + set cp437_glyphs [tcl::dict::get $opts -cp437] + foreach e [tcl::dict::get $opts -experimental] { + switch -- $e { + test_mode { + set test_mode 1 + set cp437_glyphs 1 + } + } + } + set test_mode 1 ;#try to elminate + set cp437_map [tcl::dict::create] + if {$cp437_glyphs} { + set cp437_map [set ::punk::ansi::cp437_map] + #for cp437 images we need to map these *after* splitting ansi + #some old files might use newline for its glyph.. but we can't easily support that. + #Not sure how old files did it.. maybe cr lf in sequence was newline and any lone cr or lf were displayed as glyphs? + tcl::dict::unset cp437_map \n + } + + set opt_transparent [tcl::dict::get $opts -transparent] + if {$opt_transparent eq "0"} { + set do_transparency 0 + } else { + set do_transparency 1 + if {$opt_transparent eq "1"} { + set opt_transparent {[\s]} + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_returnextra [tcl::dict::get $opts -info] + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + # -- --- --- --- --- --- --- --- --- --- --- --- + + if {$opt_row_context eq ""} { + set cursor_row 1 + } else { + set cursor_row $opt_row_context + } + + + #----- + # + if {[info exists punk::console::tabwidth]} { + #punk console is updated if punk::console::set_tabstop_width is used or rep is started/restarted + #It is way too slow to test the current width by querying the terminal here - so it could conceivably get out of sync + #todo - we also need to handle the new threaded repl where console config is in a different thread. + # - also - concept of sub-regions being mini-consoles with their own settings - 'id' for console, or use in/out channels as id? + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + + set overdata $over + if {!$cp437_glyphs} { + #REVIEW! tabify will give different answers for an ANSI colourised string vs plain text + if {!$opt_etabs} { + if {[string first \t $under] >= 0} { + #set under [textutil::tabify::untabify2 $under] + set under [textutil::tabify::untabifyLine $under $tw] + } + if {[string first \t $over] >= 0} { + #set overdata [textutil::tabify::untabify2 $over] + set overdata [textutil::tabify::untabifyLine $over $tw] + } + } + } + #------- + + #ta_detect ansi and do simpler processing? + + #we repeat tests for grapheme width in different loops - rather than create another datastructure to store widths based on column, + #we'll use the grapheme_width_cached function as a lookup table of all graphemes encountered - as there will often be repeats in different positions anyway. + + # -- --- --- --- --- --- --- --- + if {$under ne ""} { + if {[punk::ansi::ta::detect $under]} { + set undermap [punk::ansi::ta::split_codes_single $under] + } else { + #single plaintext part + set undermap [list $under] + } + } else { + set undermap [list] + } + set understacks [list] + set understacks_gx [list] + + set i_u -1 ;#underlay may legitimately be empty + set undercols [list] + set u_codestack [list] + #u_gx_stack probably isn't really a stack - I don't know if g0 g1 can stack or not - for now we support only g0 anyway + set u_gx_stack [list] ;#separate stack for g0 (g1 g2 g3?) graphics on and off (DEC special graphics) + #set pt_underchars "" ;#for string_columns length calculation for overflow 0 truncation + set remainder [list] ;#for returnextra + foreach {pt code} $undermap { + #pt = plain text + #append pt_underchars $pt + if {$cp437_glyphs} { + set pt [tcl::string::map $cp437_map $pt] + } + foreach grapheme [punk::char::grapheme_split $pt] { + #an ugly hack to serve *some* common case ascii quickly with byte-compiled literal switch - feels dirty. + #.. but even 0.5uS per char (grapheme_width_cached) adds up quickly when stitching lots of lines together. + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + set width 1 + } + default { + if {$grapheme eq "\u0000"} { + #use null as empty cell representation - review + #use of this will probably collide with some application at some point + #consider an option to set the empty cell character + set width 1 + } else { + set width [grapheme_width_cached $grapheme] + #we still want most controls and other zero-length codepoints such as \u200d (zero width joiner) to stay zero-length + #we substitute lone ESC that weren't captured within ANSI context as a debugging aid to see malformed ANSI + #todo - default to off and add a flag (?) to enable this substitution + set sub_stray_escapes 0 + if {$sub_stray_escapes && $width == 0} { + if {$grapheme eq "\x1b"} { + set gvis [ansistring VIEW $grapheme] + set grapheme $gvis + set width 1 + } + } + } + } + } + #set width [grapheme_width_cached $grapheme] + incr i_u + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + + lappend undercols $grapheme + if {$width > 1} { + #presumably there are no triple-column or wider unicode chars.. until the aliens arrive.(?) + #but what about emoji combinations etc - can they be wider than 2? + #todo - if -etabs enabled - then we treat \t as the width determined by our elastic tabstop + incr i_u + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + lappend undercols "" + } + } + + #underlay should already have been rendered and not have non-sgr codes - but let's retain the check for them and not stack them if other codes are here + + #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc + if {$code ne ""} { + set c1c2 [tcl::string::range $code 0 1] + + set leadernorm [tcl::string::range [tcl::string::map [list\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\( 7GFX\ + ] $c1c2] 0 3];# leadernorm is 1st 2 chars mapped to normalised indicator - or is original 2 chars + + switch -- $leadernorm { + 7CSI - 8CSI { + #need to exclude certain leaders after the lb e.g < for SGR 1006 mouse + #REVIEW - what else could end in m but be mistaken as a normal SGR code here? + set maybemouse "" + if {[tcl::string::index $c1c2 0] eq "\x1b"} { + set maybemouse [tcl::string::index $code 2] + } + + if {$maybemouse ne "<" && [tcl::string::index $code end] eq "m"} { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set u_codestack [list "\x1b\[m"] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set u_codestack [list $code] + } else { + #basic simplification first.. straight dups + set dup_posns [lsearch -all -exact $u_codestack $code] ;#-exact because of square-bracket glob chars + set u_codestack [lremove $u_codestack {*}$dup_posns] + lappend u_codestack $code + } + } + } + 7GFX { + switch -- [tcl::string::index $code 2] { + "0" { + set u_gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess + } + B { + set u_gx_stack [list] + } + } + } + default { + + } + + } + + #if {[punk::ansi::codetype::is_sgr_reset $code]} { + # #set u_codestack [list] + #} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + #} elseif {[punk::ansi::codetype::is_sgr $code]} { + #} else { + # #leave SGR stack as is + # if {[punk::ansi::codetype::is_gx_open $code]} { + # } elseif {[punk::ansi::codetype::is_gx_close $code]} { + # } + #} + } + #consider also if there are other codes that should be stacked..? + } + + if {!$test_mode} { + #fill columns to width with spaces, and carry over stacks - we will have to keep track of where the underlying data ends manually - TODO + #Specifying a width is suitable for terminal-like applications and text-blocks + if {$opt_width ne "\uFFEF"} { + if {[llength $understacks]} { + set cs $u_codestack + set gs $u_gx_stack + } else { + set cs [list] + set gs [list] + } + if {[llength $undercols]< $opt_width} { + set diff [expr {$opt_width- [llength $undercols]}] + if {$diff > 0} { + lappend undercols {*}[lrepeat $diff " "] + lappend understacks {*}[lrepeat $diff $cs] + lappend understacks_gx {*}[lrepeat $diff $gs] + } + } + } + } else { + #NULL empty cell indicator + if {$opt_width ne "\uFFEF"} { + if {[llength $understacks]} { + set cs $u_codestack + set gs $u_gx_stack + } else { + set cs [list] + set gs [list] + } + if {[llength $undercols]< $opt_width} { + set diff [expr {$opt_width- [llength $undercols]}] + if {$diff > 0} { + lappend undercols {*}[lrepeat $diff "\u0000"] + lappend understacks {*}[lrepeat $diff $cs] + lappend understacks_gx {*}[lrepeat $diff $gs] + } + } + } + + } + if {$opt_width ne "\uFFEF"} { + set colwidth $opt_width + } else { + set colwidth [llength $undercols] + } + + + if 0 { + # ----------------- + # if we aren't extending understacks & understacks_gx each time we incr idx above the undercols length.. this doesn't really serve a purpose + # Review. + # ----------------- + #replay code for last overlay position in input line + # whether or not we get that far - we need to return it for possible replay on next line + if {[llength $understacks]} { + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + } else { + #in case overlay onto emptystring as underlay + lappend understacks [list] + lappend understacks_gx [list] + } + # ----------------- + } + + #trailing codes in effect for underlay + if {[llength $u_codestack]} { + #set replay_codes_underlay [join $u_codestack ""] + set replay_codes_underlay [punk::ansi::codetype::sgr_merge_list {*}$u_codestack] + } else { + set replay_codes_underlay "" + } + + + # -- --- --- --- --- --- --- --- + #### + #if opt_colstart - we need to build a space (or any singlewidth char ?) padding on the left of the right number of columns. + #this will be processed as transparent - and handle doublewidth underlay characters appropriately + set startpad_overlay [string repeat " " [expr {$opt_colstart -1}]] + append startpad_overlay $overdata ;#overdata with left padding spaces based on col-start under will show through for left-padding portion regardless of -transparency + if {$startpad_overlay ne ""} { + if {[punk::ansi::ta::detect $startpad_overlay]} { + set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] + } else { + #single plaintext part + set overmap [list $startpad_overlay] + } + } else { + set overmap [list] + } + #set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] + #### + + #??? + set colcursor $opt_colstart + #TODO - make a little virtual column object + #we need to refer to column1 or columnmin? or columnmax without calculating offsets due to to startcolumn + #need to lock-down what start column means from perspective of ANSI codes moving around - the offset perspective is unclear and a mess. + + + #set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + #as at 2024-02 punk::char::grapheme_split uses these - not aware of more complex graphemes + + set overstacks [list] + set overstacks_gx [list] + + set o_codestack [list]; #SGR codestack (not other codes such as movement,insert key etc) + set o_gxstack [list] + set pt_overchars "" + set i_o 0 + set overlay_grapheme_control_list [list] ;#tag each with g, sgr or other. 'other' are things like cursor-movement or insert-mode or codes we don't recognise/use + #experiment + set overlay_grapheme_control_stacks [list] + foreach {pt code} $overmap { + + #todo - wrap in test for empty pt (we used split_codes_single - and it may be common for sgr sequences to be unmerged and so have empty pts between) + if {$cp437_glyphs} { + set pt [tcl::string::map $cp437_map $pt] + } + append pt_overchars $pt + #will get empty pt between adjacent codes + foreach grapheme [punk::char::grapheme_split $pt] { + lappend overstacks $o_codestack + lappend overstacks_gx $o_gxstack + incr i_o + lappend overlay_grapheme_control_list [list g $grapheme] + lappend overlay_grapheme_control_stacks $o_codestack + } + + #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc + #order of if-else based on assumptions: + # that pure resets are fairly common - more so than leading resets with other info + # that non-sgr codes are not that common, so ok to check for resets before verifying it is actually SGR at all. + if {$code ne ""} { + lappend overlay_grapheme_control_stacks $o_codestack + #there will always be an empty code at end due to foreach on 2 vars with odd-sized list ending with pt (overmap coming from perlish split) + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set o_codestack [list "\x1b\[m"] ;#reset better than empty list - fixes some ansi art issues + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set o_codestack [list $code] + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[priv::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $o_codestack $code] ;#must be -exact because of square-bracket glob chars + set o_codestack [lremove $o_codestack {*}$dup_posns] + lappend o_codestack $code + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[regexp {\x1b7|\x1b\[s} $code]} { + #experiment + #cursor_save - for the replays review. + #jmn + #set temp_cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + lappend overlay_grapheme_control_list [list other $code] + } elseif {[regexp {\x1b8|\x1b\[u} $code]} { + #experiment + #cursor_restore - for the replays + set o_codestack [list $temp_cursor_saved] + lappend overlay_grapheme_control_list [list other $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set o_gxstack [list "gx0_on"] + lappend overlay_grapheme_control_list [list gx0 gx0_on] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set o_gxstack [list] + lappend overlay_grapheme_control_list [list gx0 gx0_off] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend overlay_grapheme_control_list [list other $code] + } + } + } + + } + #replay code for last overlay position in input line - should take account of possible trailing sgr code after last grapheme + set max_overlay_grapheme_index [expr {$i_o -1}] + lappend overstacks $o_codestack + lappend overstacks_gx $o_gxstack + + #set replay_codes_overlay [join $o_codestack ""] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + + #if {[tcl::dict::exists $overstacks $max_overlay_grapheme_index]} { + # set replay_codes_overlay [join [tcl::dict::get $overstacks $max_overlay_grapheme_index] ""] + #} else { + # set replay_codes_overlay "" + #} + # -- --- --- --- --- --- --- --- + + + #potential problem - combinining diacritics directly following control chars like \r \b + + # -- --- --- + #we need to initialise overflow_idx before any potential row-movements - as they need to perform a loop break and force in_excess to 1 + if {$opt_overflow} { + #somewhat counterintuitively - overflow true means we can have lines as long as we want, but either way there can be excess data that needs to be thrown back to the calling loop. + set overflow_idx -1 + } else { + #overflow zero - we can't grow beyond our column width - so we get ellipsis or truncation + if {$opt_width ne "\uFFEF"} { + set overflow_idx [expr {$opt_width}] + } else { + #review - this is also the cursor position when adding a char at end of line? + set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it + } + } + # -- --- --- + + set outcols $undercols ;#leave undercols as is, outcols can potentially be appended to. + + set unapplied "" ;#if we break for move row (but not for /v ?) + set unapplied_list [list] + + set insert_lines_above 0 ;#return key + set insert_lines_below 0 + set instruction "" + + # -- --- --- + #cursor_save_dec, cursor_restore_dec etc + set cursor_restore_required 0 + set cursor_saved_attributes "" + set cursor_saved_position "" + # -- --- --- + + #set idx 0 ;# line index (cursor - 1) + #set idx [expr {$opt_colstart + $opt_colcursor} -1] + + #idx is the per column output index + set idx [expr {$opt_colcursor -1}] ;#don't use opt_colstart here - we have padded and won't start emitting until idx reaches opt_colstart-1 + #cursor_column is usually one above idx - but we have opt_colstart which is like a margin - todo: remove cursor_column from the following loop and calculate it's offset when breaking or at end. + #(for now we are incrementing/decrementing both in sync - which is a bit silly) + set cursor_column $opt_colcursor + + #idx_over is the per grapheme overlay index + set idx_over -1 + + + #movements only occur within the overlay range. + #an underlay is however not necessary.. e.g + #renderline -overflow 1 "" data + #foreach {pt code} $overmap {} + set insert_mode $opt_insert_mode ;#default 1 + set autowrap_mode $opt_autowrap_mode ;#default 1 + + #set re_mode {\x1b\[\?([0-9]*)(h|l)} ;#e.g DECAWM + #set re_col_move {\x1b\[([0-9]*)(C|D|G)$} + #set re_row_move {\x1b\[([0-9]*)(A|B)$} + #set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} ;# or "f" ? + #set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} + #set re_cursor_save {\x1b\[s$} ;#note probable incompatibility with DECSLRM (set left right margin)! + #set re_cursor_restore {\x1b\[u$} + #set re_cursor_save_dec {\x1b7$} + #set re_cursor_restore_dec {\x1b8$} + #set re_other_single {\x1b(D|M|E)$} + #set re_decstbm {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)r$} ;#DECSTBM set top and bottom margins + + #puts "-->$overlay_grapheme_control_list<--" + #puts "-->overflow_idx: $overflow_idx" + for {set gci 0} {$gci < [llength $overlay_grapheme_control_list]} {incr gci} { + set gc [lindex $overlay_grapheme_control_list $gci] + lassign $gc type item + + #emit plaintext chars first using existing SGR codes from under/over stack as appropriate + #then check if the following code is a cursor movement within the line and adjust index if so + #foreach ch $overlay_graphemes {} + switch -- $type { + g { + set ch $item + incr idx_over; #idx_over (until unapplied reached anyway) is per *grapheme* in the overlay - not per col. + if {($idx < ($opt_colstart -1))} { + incr idx [grapheme_width_cached $ch] + continue + } + #set within_undercols [expr {$idx <= [llength $undercols]-1}] ;#within our active data width + set within_undercols [expr {$idx <= $colwidth-1}] + + #https://www.enigma.com/resources/blog/the-secret-world-of-newline-characters + #\x85 NEL in the c1 control set is treated by some terminal emulators (e.g Hyper) as a newline, + #on some it's invisble but doesn't change the line, on some it's a visible glyph of width 1. + #This is hard to process in any standard manner - but I think the Hyper behaviour of doing what it was intended is perhaps most reasonable + #We will map it to the same behaviour as lf here for now... but we need also to consider the equivalent ANSI sequence: \x1bE + + set chtest [tcl::string::map [list \n \x85 \b \r \v \x7f ] $ch] + #puts --->chtest:$chtest + #specials - each shoud have it's own test of what to do if it happens after overflow_idx reached + switch -- $chtest { + "" { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + if {$idx == 0} { + #puts "---a at col 1" + #linefeed at column 1 + #leave the overflow_idx ;#? review + set instruction lf_start ;#specific instruction for newline at column 1 + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } elseif {$overflow_idx != -1 && $idx == $overflow_idx} { + #linefeed after final column + #puts "---c at overflow_idx=$overflow_idx" + incr cursor_row + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 + set instruction lf_overflow ;#only special treatment is to give it it's own instruction in case caller needs to handle differently + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + #linefeed occurred in middle or at end of text + #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" + incr cursor_row + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } + + } + "" { + #will we want/need to use raw for keypresses in terminal? (terminal with LNM in standard reset mode means enter= this is the usual config for terminals) + #So far we are assuming the caller has translated to and handle above.. REVIEW. + + #consider also the old space-carriagereturn softwrap convention used in some terminals. + #In the context of rendering to a block of text - this works similarly in that the space gets eaten so programs emitting space-cr at the terminal width col will pretty much get what they expect. + set idx [expr {$opt_colstart -1}] + set cursor_column $opt_colstart ;#? + } + "" { + #literal backspace char - not necessarily from keyboard + #review - backspace effect on double-width chars - we are taking a column-editing perspective in overtype + #(important for -transparent option - hence replacement chars for half-exposed etc) + #review - overstrike support as per nroff/less (generally considered an old technology replaced by unicode mechanisms and/or ansi SGR) + if {$idx > ($opt_colstart -1)} { + incr idx -1 + incr cursor_column -1 + } else { + set flag 0 + if $flag { + #review - conflicting requirements? Need a different sequence for destructive interactive backspace? + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction backspace_at_start + break + } + } + } + "" { + #literal del character - some terminals send just this for what is generally expected to be a destructive backspace + #We instead treat this as a pure delete at current cursor position - it is up to the repl or terminal to remap backspace key to a sequence that has the desired effect. + priv::render_delchar $idx + } + "" { + #end processing this overline. rest of line is remainder. cursor for column as is. + #REVIEW - this theoretically depends on terminal's vertical tabulation setting (name?) + #e.g it could be configured to jump down 6 rows. + #On the other hand I've seen indications that some modern terminal emulators treat it pretty much as a linefeed. + #todo? + incr cursor_row + set overflow_idx $idx + #idx_over has already been incremented as this is both a movement-control and in some sense a grapheme + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction vt + break + } + default { + if {$overflow_idx != -1} { + #review - how to check arbitrary length item such as tab is going to overflow .. before we get to overflow_idx? + #call grapheme_width_cached on each ch, or look for tab specifically as it's currently the only known reason to have a grapheme width > 2? + #we need to decide what a tab spanning the overflow_idx means and how it affects wrap etc etc + if {$idx == $overflow_idx-1} { + set owidth [grapheme_width_cached $ch] + if {$owidth == 2} { + #review split 2w overflow? + #we don't want to make the decision here to split a 2w into replacement characters at end of line and beginning of next line + #better to consider the overlay char as unable to be applied to the line + #render empty column(?) - and reduce overlay grapheme index by one so that the current ch goes into unapplied + #throwing back to caller with instruction complicates its job - but is necessary to avoid making decsions for it here. + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #change the overflow_idx + set overflow_idx $idx + incr idx + incr idx_over -1 ;#set overlay grapheme index back one so that sgr stack from previous overlay grapheme used + priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#note $gci-1 instead of just gci + #throw back to caller's loop - add instruction to caller as this is not the usual case + #caller may for example choose to render a single replacement char to this line and omit the grapheme, or wrap it to the next line + set instruction overflow_splitchar + break + } elseif {$owidth > 2} { + #? tab? + #TODO! + puts stderr "overtype::renderline long overtext grapheme '[ansistring VIEW -lf 1 -vt 1 $ch]' not handled" + #tab of some length dependent on tabstops/elastic tabstop settings? + } + } elseif {$idx >= $overflow_idx} { + #jmn? + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci-1]] + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #don't incr idx beyond the overflow_idx + #idx_over already incremented - decrement so current overlay grapheme stacks go to unapplied + incr idx_over -1 + #priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#back one index here too + priv::render_this_unapplied $overlay_grapheme_control_list $gci ;# + set instruction overflow + break + } + } else { + #review. + #This corresponds to opt_overflow being true (at least until overflow_idx is in some cases forced to a value when throwing back to calling loop) + } + + if {($do_transparency && [regexp $opt_transparent $ch])} { + #pre opt_colstart is effectively transparent (we have applied padding of required number of columns to left of overlay) + if {$idx > [llength $outcols]-1} { + lappend outcols " " + #tcl::dict::set understacks $idx [list] ;#review - use idx-1 codestack? + lset understacks $idx [list] + incr idx + incr cursor_column + } else { + #todo - punk::char::char_width + set g [lindex $outcols $idx] + set uwidth [grapheme_width_cached $g] + if {[lindex $outcols $idx] eq ""} { + #2nd col of 2-wide char in underlay + incr idx + incr cursor_column + } elseif {$uwidth == 0} { + #e.g control char ? combining diacritic ? + incr idx + incr cursor_column + } elseif {$uwidth == 1} { + set owidth [grapheme_width_cached $ch] + incr idx + incr cursor_column + if {$owidth > 1} { + incr idx + incr cursor_column + } + } elseif {$uwidth > 1} { + if {[grapheme_width_cached $ch] == 1} { + if {!$insert_mode} { + #normal singlewide transparent overlay onto double-wide underlay + set next_pt_overchar [tcl::string::index $pt_overchars $idx_over+1] ;#lookahead of next plain-text char in overlay + if {$next_pt_overchar eq ""} { + #special-case trailing transparent - no next_pt_overchar + incr idx + incr cursor_column + } else { + if {[regexp $opt_transparent $next_pt_overchar]} { + incr idx + incr cursor_column + } else { + #next overlay char is not transparent.. first-half of underlying 2wide char is exposed + #priv::render_addchar $idx $opt_exposed1 [tcl::dict::get $overstacks $idx_over] [tcl::dict::get $overstacks_gx $idx_over] $insert_mode + priv::render_addchar $idx $opt_exposed1 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + } + } + } else { + #? todo - decide what transparency even means for insert mode + incr idx + incr cursor_column + } + } else { + #2wide transparency over 2wide in underlay - review + incr idx + incr cursor_column + } + } + } + } else { + + set idxchar [lindex $outcols $idx] + #non-transparent char in overlay or empty cell + if {$idxchar eq "\u0000"} { + #empty/erased cell indicator + set uwidth 1 + } else { + set uwidth [grapheme_width_cached $idxchar] + } + if {$within_undercols} { + if {$idxchar eq ""} { + #2nd col of 2wide char in underlay + if {!$insert_mode} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 + #JMN - this has to expose if our startposn chopped an underlay - but not if we already overwrote the first half of the widechar underlay grapheme + #e.g renderline \uFF21\uFF21--- a\uFF23\uFF23 + #vs + # renderline -startcolumn 2 \uFF21---- \uFF23 + if {[lindex $outcols $idx-1] != ""} { + #verified it's an empty following a filled - so it's a legit underlay remnant (REVIEW - when would it not be??) + #reset previous to an exposed 1st-half - but leave understacks code as is + priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 + } + incr idx + } else { + set prevcolinfo [lindex $outcols $idx-1] + #for insert mode - first replace the empty 2ndhalf char with exposed2 before shifting it right + #REVIEW - this leaves a replacement character permanently in our columns.. but it is consistent regarding length (?) + #The alternative is to disallow insertion at a column cursor that is at 2nd half of 2wide char + #perhaps by inserting after the char - this may be worthwhile - but may cause other surprises + #It is perhaps best avoided at another level and try to make renderline do exactly as it's told + #the advantage of this 2w splitting method is that the inserted character ends up in exactly the column we expect. + priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 ;#replace not insert + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 1 ;#insert - same index + if {$prevcolinfo ne ""} { + #we've split the 2wide - it may already have been rendered as an exposed1 - but not for example if our startcolumn was current idx + priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 ;#replace not insert + } ;# else?? + incr idx + } + if {$cursor_column < [llength $outcols] || $overflow_idx == -1} { + incr cursor_column + } + } elseif {$uwidth == 0} { + #what if this is some other c0/c1 control we haven't handled specifically? + + #by emitting a preceding empty-string column - we associate whatever this char is with the preceeding non-zero-length character and any existing zero-lengths that follow it + #e.g combining diacritic - increment before over char REVIEW + #arguably the previous overchar should have done this - ie lookahead for combiners? + #if we can get a proper grapheme_split function - this should be easier to tidy up. + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column 2 + + if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { + set cursor_column [llength $outcols] + } + } elseif {$uwidth == 1} { + #includes null empty cells + set owidth [grapheme_width_cached $ch] + if {$owidth == 1} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + } else { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #if next column in underlay empty - we've overwritten first half of underlying 2wide grapheme + #replace with rhs exposure in case there are no more overlay graphemes coming - use underlay's stack + if {([llength $outcols] >= $idx +2) && [lindex $outcols $idx+1] eq ""} { + priv::render_addchar [expr {$idx+1}] $opt_exposed2 [lindex $understacks $idx+1] [lindex $understacks_gx $idx+1] $insert_mode + } + incr idx + } + if {($cursor_column < [llength $outcols]) || $overflow_idx == -1 || $test_mode} { + incr cursor_column + } + } elseif {$uwidth > 1} { + set owidth [grapheme_width_cached $ch] + if {$owidth == 1} { + #1wide over 2wide in underlay + if {!$insert_mode} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #don't incr idx - we are just putting a broken-indication in the underlay - which may get overwritten by next overlay char + } else { + #insert mode just pushes all to right - no exposition char here + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + } + } else { + #2wide over 2wide + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx 2 + incr cursor_column 2 + } + + if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { + set cursor_column [llength $outcols] + } + } + } else { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + if {$overflow_idx !=-1 && !$test_mode} { + #overflow + if {$cursor_column > [llength $outcols]} { + set cursor_column [llength $outcols] + } + } + } + } + } + } ;# end switch + + + } + other { + #todo - consider CSI s DECSLRM vs ansi.sys \x1b\[s - we need \x1b\[s for oldschool ansi art - but may have to enable only for that. + #we should probably therefore reverse this mapping so that x1b7 x1b8 are the primary codes for save/restore + set code [tcl::string::map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $item] + #since this element isn't a grapheme - advance idx_over to next grapheme overlay when about to fill 'unapplied' + + set matchinfo [list] + + #remap of DEC cursor_save/cursor_restore from ESC sequence to equivalent CSI + #probably not ideal - consider putting cursor_save/cursor_restore in functions so they can be called from the appropriate switch branch instead of using this mapping + #review - cost/benefit of function calls within these switch-arms instead of inline code? + + set c1 [tcl::string::index $code 0] + set c1c2c3 [tcl::string::range $code 0 2] + #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + #tcl 8.7 - faster to use inline list than to store it in a local var outside of loop. + #(surprising - but presumably ) + set leadernorm [tcl::string::range [tcl::string::map [list\ + \x1b\[< 1006\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\] 7OSC\ + \x9d 8OSC\ + \x1b 7ESC\ + ] $c1c2c3] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars + + #we leave the tail of the code unmapped for now + switch -- $leadernorm { + 1006 { + #https://invisible-island.net/xterm/ctlseqs/ctlseqs.html + #SGR (1006) CSI < followed by colon separated encoded-button-value,px,py ordinates and final M for button press m for button release + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 3 end]] + } + 7CSI - 7OSC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 2 end]] + } + 7ESC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] + } + 8CSI - 8OSC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] + } + default { + #we haven't made a mapping for this + set codenorm $code + } + } + + #we've mapped 7 and 8bit escapes to values we can handle as literals in switch statements to take advantange of jump tables. + switch -- $leadernorm { + 1006 { + #TODO + # + switch -- [tcl::string::index $codenorm end] { + M { + puts stderr "mousedown $codenorm" + } + m { + puts stderr "mouseup $codenorm" + } + } + + } + {7CSI} - {8CSI} { + set param [tcl::string::range $codenorm 4 end-1] + #puts stdout "--> CSI [tcl::string::index $leadernorm 0] bit param:$param" + switch -- [tcl::string::index $codenorm end] { + D { + #Col move + #puts stdout "<-back" + #cursor back + #left-arrow/move-back when ltr mode + set num $param + if {$num eq ""} {set num 1} + + set version 2 + if {$version eq "2"} { + #todo - startcolumn offset! + if {$cursor_column - $num >= 1} { + incr idx -$num + incr cursor_column -$num + } else { + if {!$autowrap_mode} { + set cursor_column 1 + set idx 0 + } else { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + incr cursor_column -$num + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction wrapmovebackward + break + } + } + } else { + incr idx -$num + incr cursor_column -$num + if {$idx < $opt_colstart-1} { + #wrap to previous line and position cursor at end of data + set idx [expr {$opt_colstart-1}] + set cursor_column $opt_colstart + } + } + } + C { + #Col move + #puts stdout "->forward" + #todo - consider right-to-left cursor mode (e.g Hebrew).. some day. + #cursor forward + #right-arrow/move forward + set num $param + if {$num eq ""} {set num 1} + + #todo - retrict to moving 1 position past datalen? restrict to column width? + #should ideally wrap to next line when interactive and not on last row + #(some ansi art seems to expect this behaviour) + #This presumably depends on the terminal's wrap mode + #e.g DECAWM autowrap mode + # CSI ? 7 h - set: autowrap (also tput smam) + # CSI ? 7 l - reset: no autowrap (also tput rmam) + set version 2 + if {$version eq "2"} { + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$test_mode && $cursor_column == $max+1} { + #move_forward while in overflow + incr cursor_column -1 + } + + if {($cursor_column + $num) <= $max} { + incr idx $num + incr cursor_column $num + } else { + if {$autowrap_mode} { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #jmn + if {$idx == $overflow_idx} { + incr num + } + + #horizontal movement beyond line extent needs to wrap - throw back to caller + #we may have both overflow_rightand unapplied data + #(can have overflow_right if we were in insert_mode and processed chars prior to this movement) + #leave row as is - caller will need to determine how many rows the column-movement has consumed + incr cursor_column $num ;#give our caller the necessary info as columns from start of row + #incr idx_over + #should be gci following last one applied + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction wrapmoveforward + break + } else { + set cursor_column $max + set idx [expr {$cursor_column -1}] + } + } + } else { + if {!$opt_overflow || ($cursor_column + $num) <= [llength $outcols+1]} { + incr idx $num + incr cursor_column $num + } else { + if {!$insert_mode} { + #block editing style with arrow keys + #overtype mode + set idxstart $idx + set idxend [llength $outcols] + set moveend [expr {$idxend - $idxstart}] + if {$moveend < 0} {set moveend 0} ;#sanity? + #puts "idxstart:$idxstart idxend:$idxend outcols[llength $outcols] undercols:[llength $undercols]" + incr idx $moveend + incr cursor_column $moveend + #if {[tcl::dict::exists $understacks $idx]} { + # set stackinfo [tcl::dict::get $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext + #} else { + # set stackinfo [list] + #} + if {$idx < [llength $understacks]} { + set stackinfo [lindex $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext + } else { + set stackinfo [list] + } + if {$idx < [llength $understacks_gx]} { + #set gxstackinfo [tcl::dict::get $understacks_gx $idx] + set gxstackinfo [lindex $understacks_gx $idx] + } else { + set gxstackinfo [list] + } + #pad outcols + set movemore [expr {$num - $moveend}] + #assert movemore always at least 1 or we wouldn't be in this branch + for {set m 1} {$m <= $movemore} {incr m} { + incr idx + incr cursor_column + priv::render_addchar $idx " " $stackinfo $gxstackinfo $insert_mode + } + } else { + #normal - insert + incr idx $num + incr cursor_column $num + if {$idx > [llength $outcols]} { + set idx [llength $outcols];#allow one beyond - for adding character at end of line + set cursor_column [expr {[llength $outcols]+1}] + } + } + } + } + } + G { + #Col move + #move absolute column + #adjust to colstart - as column 1 is within overlay + #??? + set idx [expr {$param + $opt_colstart -1}] + set cursor_column $param + error "renderline absolute col move ESC G unimplemented" + } + A { + #Row move - up + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set num $param + if {$num eq ""} {set num 1} + incr cursor_row -$num + + if {$cursor_row < 1} { + set cursor_row 1 + } + + #ensure rest of *overlay* is emitted to remainder + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction up + #retain cursor_column + break + } + B { + #Row move - down + set num $param + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #move down + if {$num eq ""} {set num 1} + incr cursor_row $num + + + incr idx_over ;#idx_over hasn't encountered a grapheme and hasn't advanced yet + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction down + #retain cursor_column + break + } + H - f { + #$re_both_move + lassign [split $param {;}] row col + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #lassign $matchinfo _match row col + + if {$col eq ""} {set col 1} + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$col > $max} { + set cursor_column $max + } else { + set cursor_column $col + } + set idx [expr {$cursor_column -1}] + + if {$row eq ""} {set row 1} + set cursor_row $row + if {$cursor_row < 1} { + set cursor_row 1 + } + + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction move + break + + } + X { + puts stderr "X - $param" + #ECH - erase character + if {$param eq "" || $param eq "0"} {set param 1}; #param=count of chars to erase + priv::render_erasechar $idx $param + #cursor position doesn't change. + } + r { + #$re_decstbm + #https://www.vt100.net/docs/vt510-rm/DECSTBM.html + #This control function sets the top and bottom margins for the current page. You cannot perform scrolling outside the margins + lassign [split $param {;}] margin_top margin_bottom + + #todo - return these for the caller to process.. + puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + set cursor_column 1 + set cursor_row 1 + + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction move ;#own instruction? decstbm? + break + } + s { + # - todo - make ansi.sys CSI s cursor save only apply for certain cases? - may need to support DECSLRM instead which uses same code + + #$re_cursor_save + #cursor save could come after last column + if {$overflow_idx != -1 && $idx == $overflow_idx} { + #bartman2.ans test file - fixes misalignment at bottom of dialog bubble + #incr cursor_row + #set cursor_column 1 + #bwings1.ans test file - breaks if we actually incr cursor (has repeated saves) + set cursor_saved_position [list row [expr {$cursor_row+1}] column 1] + } else { + set cursor_saved_position [list row $cursor_row column $cursor_column] + } + #there may be overlay stackable codes emitted that aren't in the understacks because they come between the last emmited character and the cursor_save control. + #we need the SGR and gx overlay codes prior to the cursor_save + + #a real terminal would not be able to know the state of the underlay.. so we should probably ignore it. + #set sgr_stack [lindex $understacks $idx] + #set gx_stack [lindex $understacks_gx $idx] ;#not actually a stack - just a boolean state (for now?) + + set sgr_stack [list] + set gx_stack [list] + + #we shouldn't need to scan for intermediate cursor save/restores - as restores would throw-back to the calling loop - so our overlay 'line' is since those. + #The overlay_grapheme_control_list had leading resets from previous lines - so we go back to the beginning not just the first grapheme. + + foreach gc [lrange $overlay_grapheme_control_list 0 $gci-1] { + lassign $gc type code + #types g other sgr gx0 + switch -- $type { + gx0 { + #code is actually a stand-in for the graphics on/off code - not the raw code + #It is either gx0_on or gx0_off + set gx_stack [list $code] + } + sgr { + #code is the raw code + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #jmn + set sgr_stack [list "\x1b\[m"] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set sgr_stack [list $code] + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[priv::is_sgr $code]} { + #often we don't get resets - and codes just pile up. + #as a first step to simplifying - at least remove earlier straight up dupes + set dup_posns [lsearch -all -exact $sgr_stack $code] ;#needs -exact - codes have square-brackets (glob chars) + set sgr_stack [lremove $sgr_stack {*}$dup_posns] + lappend sgr_stack $code + } + } + } + } + set cursor_saved_attributes "" + switch -- [lindex $gx_stack 0] { + gx0_on { + append cursor_saved_attributes "\x1b(0" + } + gx0_off { + append cursor_saved_attributes "\x1b(B" + } + } + #append cursor_saved_attributes [join $sgr_stack ""] + append cursor_saved_attributes [punk::ansi::codetype::sgr_merge_list {*}$sgr_stack] + + #as there is apparently only one cursor storage element we don't need to throw back to the calling loop for a save. + + #don't incr index - or the save will cause cursor to move to the right + #carry on + + } + u { + #$re_cursor_restore + #we are going to jump somewhere.. for now we will assume another line, and process accordingly. + #The caller has the cursor_saved_position/cursor_saved_attributes if any (?review - if we always pass it back it, we could save some calls for moves in same line) + #don't set overflow at this point. The existing underlay to the right must be preserved. + #we only want to jump and render the unapplied at the new location. + + #lset overstacks $idx_over [list] + #set replay_codes_overlay "" + + #if {$cursor_saved_attributes ne ""} { + # set replay_codes_overlay $cursor_saved_attributes ;#empty - or last save if it happend in this input chunk + #} else { + #jj + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay "" + #} + + #like priv::render_unapplied - but without the overlay's ansi reset or gx stacks from before the restore code + incr idx_over + + set unapplied "" + set unapplied_list [list] + foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { + lassign $gc type item + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + lappend unapplied_list "\x1b(0" + } elseif {$item eq "gx0_off"} { + lappend unapplied_list "\x1b(B" + } + } else { + lappend unapplied_list $item + } + #incr idx_over + } + set unapplied [join $unapplied_list ""] + #if the save occured within this line - that's ok - it's in the return value list and caller can prepend for the next loop. + set instruction restore_cursor + break + } + ~ { + #$re_vt_sequence + #lassign $matchinfo _match key mod + lassign [split $param {;}] key mod + + #Note that f1 to f4 show as ESCOP|Q|R|S (VT220?) but f5+ show as ESC\[15~ + # + #e.g esc \[2~ insert esc \[2;2~ shift-insert + #mod - subtract 1, and then use bitmask + #shift = 1, (left)Alt = 2, control=4, meta=8 (meta seems to do nothing on many terminals on windows? Intercepted by windows?) + #puts stderr "vt key:$key mod:$mod code:[ansistring VIEW $code]" + if {$key eq "1"} { + #home + } elseif {$key eq "2"} { + #Insert + if {$mod eq ""} { + #no modifier key + set insert_mode [expr {!$insert_mode}] + #rather than set the cursor - we return the insert mode state so the caller can decide + } + } elseif {$key eq "3"} { + #Delete - presumably this shifts other chars in the line, with empty cells coming in from the end + switch -- $mod { + "" { + priv::render_delchar $idx + } + "5" { + #ctrl-del - delete to end of word (pwsh) - possibly word on next line if current line empty(?) + } + } + } elseif {$key eq "4"} { + #End + } elseif {$key eq "5"} { + #pgup + } elseif {$key eq "6"} { + #pgDn + } elseif {$key eq "7"} { + #Home + #?? + set idx [expr {$opt_colstart -1}] + set cursor_column 1 + } elseif {$key eq "8"} { + #End + } elseif {$key eq "11"} { + #F1 - or ESCOP or e.g shift F1 ESC\[1;2P + } elseif {$key eq "12"} { + #F2 - or ESCOQ + } elseif {$key eq "13"} { + #F3 - or ESCOR + } elseif {$key eq "14"} { + #F4 - or ESCOS + } elseif {$key eq "15"} { + #F5 or shift F5 ESC\[15;2~ + } elseif {$key eq "17"} { + #F6 + } elseif {$key eq "18"} { + #F7 + } elseif {$key eq "19"} { + #F8 + } elseif {$key eq "20"} { + #F9 + } elseif {$key eq "21"} { + #F10 + } elseif {$key eq "23"} { + #F11 + } elseif {$key eq "24"} { + #F12 + } + + } + h - l { + #we are matching only last char to get to this arm - but are there other sequences ending in h|l we need to handle? + + #$re_mode if first after CSI is "?" + #some docs mention ESC=h|l - not seen on windows terminals.. review + #e.g https://www2.math.upenn.edu/~kazdan/210/computer/ansi.html + if {[tcl::string::index $codenorm 4] eq "?"} { + set num [tcl::string::range $codenorm 5 end-1] ;#param between ? and h|l + #lassign $matchinfo _match num type + switch -- $num { + 5 { + #DECSNM - reverse video + #How we simulate this to render within a block of text is an open question. + #track all SGR stacks and constantly flip based on the current SGR reverse state? + #It is the job of the calling loop to do this - so at this stage we'll just set the states + #DECAWM autowrap + if {$type eq "h"} { + #set (enable) + set reverse_mode 1 + } else { + #reset (disable) + set reverse_mode 0 + } + + } + 7 { + #DECAWM autowrap + if {$type eq "h"} { + #set (enable) + set autowrap_mode 1 + if {$opt_width ne "\uFFEF"} { + set overflow_idx $opt_width + } else { + #review - this is also the cursor position when adding a char at end of line? + set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it + } + #review - can idx ever be beyond overflow_idx limit when we change e.g with a width setting and cursor movements? presume not - but sanity check for now. + if {$idx >= $overflow_idx} { + puts stderr "renderline error - idx '$idx' >= overflow_idx '$overflow_idx' - unexpected" + } + } else { + #reset (disable) + set autowrap_mode 0 + set overflow_idx -1 + } + } + 25 { + if {$type eq "h"} { + #visible cursor + + } else { + #invisible cursor + + } + } + } + + } else { + puts stderr "overtype::renderline CSI...h|l code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + default { + puts stderr "overtype::renderline CSI code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + } + 7ESC { + #$re_other_single + switch -- [tcl::string::index $codenorm end] { + D { + #\x84 + #index (IND) + #vt102-docs: "Moves cursor down one line in same column. If cursor is at bottom margin, screen performs a scroll-up" + puts stderr "ESC D not fully implemented" + incr cursor_row + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction down + #retain cursor_column + break + } + M { + #\x8D + #Reverse Index (RI) + #vt102-docs: "Moves cursor up one line in same column. If cursor is at top margin, screen performs a scroll-down" + puts stderr "ESC M not fully implemented" + + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #move up + incr cursor_row -1 + if {$cursor_row < 1} { + set cursor_row 1 + } + #ensure rest of *overlay* is emitted to remainder + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction up ;#need instruction for scroll-down? + #retain cursor_column + break + } + E { + #\x85 + #review - is behaviour different to lf? + #todo - possibly(?) same logic as handling above. i.e return instruction depends on where column_cursor is at the time we get NEL + #leave implementation until logic for is set in stone... still under review + #It's arguable NEL is a pure cursor movement as opposed to the semantic meaning of crlf or lf in a file. + # + #Next Line (NEL) "Move the cursor to the left margin on the next line. If the cursor is at the bottom margin, scroll the page up" + puts stderr "ESC E unimplemented" + + } + default { + puts stderr "overtype::renderline ESC code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + + } + } + + #switch -regexp -matchvar matchinfo -- $code\ + #$re_mode { + #}\ + #default { + # puts stderr "overtype::renderline code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + #} + + } + default { + #don't need to handle sgr or gx0 types + #we have our sgr gx0 codes already in stacks for each overlay grapheme + } + } + } + + #-------- + if {$opt_overflow == 0} { + #need to truncate to the width of the original undertext + #review - string_width vs printing_length here. undertext requirement to be already rendered therefore punk::char::string_width ok? + #set num_under_columns [punk::char::string_width $pt_underchars] ;#plaintext underchars + } + if {$overflow_idx == -1} { + #overflow was initially unlimited and hasn't been overridden + } else { + + } + #-------- + + + #coalesce and replay codestacks for outcols grapheme list + set outstring "" ;#output prior to overflow + set overflow_right "" ;#remainder after overflow point reached + set i 0 + set cstack [list] + set prevstack [list] + set prev_g0 [list] + #note overflow_idx may already have been set lower if we had a row move above due to \v or ANSI moves + set in_overflow 0 ;#used to stop char-width scanning once in overflow + if {$overflow_idx == 0} { + #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW + set in_overflow 1 + } + foreach ch $outcols { + #puts "---- [ansistring VIEW $ch]" + + set gxleader "" + if {$i < [llength $understacks_gx]} { + #set g0 [tcl::dict::get $understacks_gx $i] + set g0 [lindex $understacks_gx $i] + if {$g0 ne $prev_g0} { + if {$g0 eq [list "gx0_on"]} { + set gxleader "\x1b(0" + } else { + set gxleader "\x1b(B" + } + } + set prev_g0 $g0 + } else { + set prev_g0 [list] + } + + set sgrleader "" + if {$i < [llength $understacks]} { + #set cstack [tcl::dict::get $understacks $i] + set cstack [lindex $understacks $i] + if {$cstack ne $prevstack} { + if {[llength $prevstack] && ![llength $cstack]} { + #This reset is important e.g testfile fruit.ans - we get overhang on rhs without it. But why is cstack empty? + append sgrleader \033\[m + } else { + append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + } + } + set prevstack $cstack + } else { + set prevstack [list] + } + + + + if {$in_overflow} { + if {$i == $overflow_idx} { + set 0 [lindex $understacks_gx $i] + set gxleader "" + if {$g0 eq [list "gx0_on"]} { + set gxleader "\x1b(0" + } elseif {$g0 eq [list "gx0_off"]} { + set gxleader "\x1b(B" + } + append overflow_right $gxleader + set cstack [lindex $understacks $i] + set sgrleader "" + #whether cstack is same or differs from previous char's stack - we must have an output at the start of the overflow_right + #if {[llength $prevstack] && ![llength $cstack]} { + # append sgrleader \033\[m + #} + append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + append overflow_right $sgrleader + append overflow_right $ch + } else { + append overflow_right $gxleader + append overflow_right $sgrleader + append overflow_right $ch + } + } else { + if {$overflow_idx != -1 && $i+1 == $overflow_idx} { + #one before overflow + #will be in overflow in next iteration + set in_overflow 1 + if {[grapheme_width_cached $ch]> 1} { + #we overflowed with second-half of a double-width char - replace first-half with user-supplied exposition char (should be 1 wide) + set ch $opt_exposed1 + } + } + append outstring $gxleader + append outstring $sgrleader + if {$idx+1 < $cursor_column} { + append outstring [tcl::string::map {\u0000 " "} $ch] + } else { + append outstring $ch + } + } + incr i + } + #flower.ans good test for null handling - reverse line building + if {![ansistring length $overflow_right]} { + set outstring [tcl::string::trimright $outstring "\u0000"] + } + set outstring [tcl::string::map {\u0000 " "} $outstring] + set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + + set replay_codes "" + if {[llength $understacks] > 0} { + if {$overflow_idx == -1} { + #set tail_idx [tcl::dict::size $understacks] + set tail_idx [llength $understacks] + } else { + set tail_idx [llength $undercols] + } + if {$tail_idx-1 < [llength $understacks]} { + #set replay_codes [join [lindex $understacks $tail_idx-1] ""] ;#tail replay codes + set replay_codes [punk::ansi::codetype::sgr_merge_list {*}[lindex $understacks $tail_idx-1]] ;#tail replay codes + } + if {$tail_idx-1 < [llength $understacks_gx]} { + set gx0 [lindex $understacks_gx $tail_idx-1] + if {$gx0 eq [list "gx0_on"]} { + #if it was on, turn gx0 off at the point we stop processing overlay + append outstring "\x1b(B" + } + } + } + if {[string length $overflow_right]} { + #puts stderr "remainder:$overflow_right" + } + #pdict $understacks + + if {[punk::ansi::ta::detect_sgr $outstring]} { + append outstring [punk::ansi::a] ;#without this - we would get for example, trailing backgrounds after rightmost column + + #close off any open gx? + #probably should - and overflow_right reopen? + } + + if {$opt_returnextra} { + #replay_codes is the codestack at the boundary - used for ellipsis colouring to match elided text - review + #replay_codes_underlay is the set of codes in effect at the very end of the original underlay + + #review + #replay_codes_overlay is the set of codes in effect at the very end of the original overlay (even if not all overlay was applied) + #todo - replay_codes for gx0 mode + + #overflow_idx may change during ansi & character processing + if {$overflow_idx == -1} { + set overflow_right_column "" + } else { + set overflow_right_column [expr {$overflow_idx+1}] + } + set result [tcl::dict::create\ + result $outstring\ + visualwidth [punk::ansi::printing_length $outstring]\ + instruction $instruction\ + stringlen [string length $outstring]\ + overflow_right_column $overflow_right_column\ + overflow_right $overflow_right\ + unapplied $unapplied\ + unapplied_list $unapplied_list\ + insert_mode $insert_mode\ + autowrap_mode $autowrap_mode\ + insert_lines_above $insert_lines_above\ + insert_lines_below $insert_lines_below\ + cursor_saved_position $cursor_saved_position\ + cursor_saved_attributes $cursor_saved_attributes\ + cursor_column $cursor_column\ + cursor_row $cursor_row\ + opt_overflow $opt_overflow\ + replay_codes $replay_codes\ + replay_codes_underlay $replay_codes_underlay\ + replay_codes_overlay $replay_codes_overlay\ + ] + if {$opt_returnextra == 1} { + return $result + } else { + #human/debug - map special chars to visual glyphs + set viewop VIEW + switch -- $opt_returnextra { + 2 { + #codes and character data + set viewop VIEWCODES ;#ansi colorisation of codes - green for SGR, blue/blue reverse for cursor_save/cursor_restore, cyan for movements, orange for others + } + 3 { + set viewop VIEWSTYLE ;#ansi colorise the characters within the output with preceding codes, stacking codes only within each dict value - may not be same SGR effect as the effect in-situ. + } + } + tcl::dict::set result result [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result result]] + tcl::dict::set result overflow_right [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result overflow_right]] + tcl::dict::set result unapplied [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result unapplied]] + tcl::dict::set result unapplied_list [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result unapplied_list]] + tcl::dict::set result replay_codes [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes]] + tcl::dict::set result replay_codes_underlay [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes_underlay]] + tcl::dict::set result replay_codes_overlay [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes_overlay]] + tcl::dict::set result cursor_saved_attributes [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result cursor_saved_attributes]] + return $result + } + } else { + return $outstring + } + #return [join $out ""] + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace overtype ---}] +} + +tcl::namespace::eval overtype::piper { + proc overcentre {args} { + if {[llength $args] < 2} { + error {usage: ?-bias left|right? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} + } + lassign [lrange $args end-1 end] over under + set argsflags [lrange $args 0 end-2] + tailcall overtype::centre {*}$argsflags $under $over + } + proc overleft {args} { + if {[llength $args] < 2} { + error {usage: ?-startcolumn ? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} + } + lassign [lrange $args end-1 end] over under + set argsflags [lrange $args 0 end-2] + tailcall overtype::left {*}$argsflags $under $over + } +} + + +# -- --- --- --- --- --- --- --- --- --- --- +proc overtype::transparentline {args} { + foreach {under over} [lrange $args end-1 end] break + set argsflags [lrange $args 0 end-2] + set defaults [tcl::dict::create\ + -transparent 1\ + -exposed 1 " "\ + -exposed 2 " "\ + ] + set newargs [tcl::dict::merge $defaults $argsflags] + tailcall overtype::renderline {*}$newargs $under $over +} +#renderline may not make sense as it is in the long run for blocks of text - but is handy in the single-line-handling form anyway. +# We are trying to handle ansi codes in a block of text which is acting like a mini-terminal in some sense. +#We can process standard cursor moves such as \b \r - but no way to respond to other cursor movements e.g moving to other lines. +# +tcl::namespace::eval overtype::piper { + proc renderline {args} { + if {[llength $args] < 2} { + error {usage: ?-start ? ?-transparent [0|1|]? ?-overflow [1|0]? overtext pipelinedata} + } + foreach {over under} [lrange $args end-1 end] break + set argsflags [lrange $args 0 end-2] + tailcall overtype::renderline {*}$argsflags $under $over + } +} +interp alias "" piper_renderline "" overtype::piper::renderline + +#intended for single grapheme - but will work for multiple +#cannot contain ansi or newlines +#(a cache of ansifreestring_width calls - as these are quite regex heavy) +proc overtype::grapheme_width_cached {ch} { + variable grapheme_widths + if {[tcl::dict::exists $grapheme_widths $ch]} { + return [tcl::dict::get $grapheme_widths $ch] + } + set width [punk::char::ansifreestring_width $ch] + tcl::dict::set grapheme_widths $ch $width + return $width +} + + + +proc overtype::test_renderline {} { + set t \uFF5E ;#2-wide tilde + set u \uFF3F ;#2-wide underscore + set missing \uFFFD + return [list $t $u A${t}B] +} + +#maintenance warning +#same as textblock::size - but we don't want that circular dependency +#block width and height can be tricky. e.g \v handled differently on different terminal emulators and can affect both +proc overtype::blocksize {textblock} { + if {$textblock eq ""} { + return [tcl::dict::create width 0 height 1] ;#no such thing as zero-height block - for consistency with non-empty strings having no line-endings + } + if {[tcl::string::first \t $textblock] >= 0} { + if {[info exists punk::console::tabwidth]} { + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + set textblock [textutil::tabify::untabify2 $textblock $tw] + } + #stripansi on entire block in one go rather than line by line - result should be the same - review - make tests + if {[punk::ansi::ta::detect $textblock]} { + set textblock [punk::ansi::stripansi $textblock] + } + if {[tcl::string::last \n $textblock] >= 0} { + set num_le [expr {[tcl::string::length $textblock]-[tcl::string::length [tcl::string::map {\n {}} $textblock]]}] ;#faster than splitting into single-char list + set width [tcl::mathfunc::max {*}[lmap v [split $textblock \n] {::punk::char::ansifreestring_width $v}]] + } else { + set num_le 0 + set width [punk::char::ansifreestring_width $textblock] + } + #our concept of block-height is likely to be different to other line-counting mechanisms + set height [expr {$num_le + 1}] ;# one line if no le - 2 if there is one trailing le even if no data follows le + + return [tcl::dict::create width $width height $height] ;#maintain order in 'image processing' standard width then height - caller may use lassign [dict values [blocksize ]] width height +} + +tcl::namespace::eval overtype::priv { + variable cache_is_sgr [tcl::dict::create] + + #we are likely to be asking the same question of the same ansi codes repeatedly + #caching the answer saves some regex expense - possibly a few uS to lookup vs under 1uS + #todo - test if still worthwhile after a large cache is built up. (limit cache size?) + proc is_sgr {code} { + variable cache_is_sgr + if {[tcl::dict::exists $cache_is_sgr $code]} { + return [tcl::dict::get $cache_is_sgr $code] + } + set answer [punk::ansi::codetype::is_sgr $code] + tcl::dict::set cache_is_sgr $code $answer + return $answer + } + proc render_unapplied {overlay_grapheme_control_list gci} { + upvar idx_over idx_over + upvar unapplied unapplied + upvar unapplied_list unapplied_list ;#maintaining as a list allows caller to utilize it without having to re-split + upvar overstacks overstacks + upvar overstacks_gx overstacks_gx + upvar overlay_grapheme_control_stacks og_stacks + + #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] + set unapplied "" + set unapplied_list [list] + #append unapplied [join [lindex $overstacks $idx_over] ""] + #append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $overstacks $idx_over]] + set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + if {$sgr_merged ne ""} { + lappend unapplied_list $sgr_merged + } + switch -- [lindex $overstacks_gx $idx_over] { + "gx0_on" { + lappend unapplied_list "\x1b(0" + } + "gx0_off" { + lappend unapplied_list "\x1b(B" + } + } + + foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { + lassign $gc type item + #types g other sgr gx0 + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + lappend unapplied_list "\x1b(0" + } elseif {$item eq "gx0_off"} { + lappend unapplied_list "\x1b(B" + } + } else { + lappend unapplied_list $item + } + } + set unapplied [join $unapplied_list ""] + } + + #clearer - renders the specific gci forward as unapplied - prefixed with it's merged sgr stack + proc render_this_unapplied {overlay_grapheme_control_list gci} { + upvar idx_over idx_over + upvar unapplied unapplied + upvar unapplied_list unapplied_list + upvar overstacks overstacks + upvar overstacks_gx overstacks_gx + upvar overlay_grapheme_control_stacks og_stacks + + #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] + set unapplied "" + set unapplied_list [list] + + set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + if {$sgr_merged ne ""} { + lappend unapplied_list $sgr_merged + } + switch -- [lindex $overstacks_gx $idx_over] { + "gx0_on" { + lappend unapplied_list "\x1b(0" + } + "gx0_off" { + lappend unapplied_list "\x1b(B" + } + } + + foreach gc [lrange $overlay_grapheme_control_list $gci end] { + lassign $gc type item + #types g other sgr gx0 + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + lappend unapplied_list "\x1b(0" + } elseif {$item eq "gx0_off"} { + lappend unapplied_list "\x1b(B" + } + } else { + lappend unapplied_list $item + } + } + set unapplied [join $unapplied_list ""] + } + proc render_delchar {i} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + set nxt [llength $o] + if {$i < $nxt} { + set o [lreplace $o $i $i] + set ustacks [lreplace $ustacks $i $i] + set gxstacks [lreplace $gxstacks $i $i] + } else { + + } + } + proc render_erasechar {i count} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + #ECH clears character attributes from erased character positions + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + if {![tcl::string::is integer -strict $count] || $count < 1} { + error "render_erasechar count must be integer >= 1" + } + set start $i + set end [expr {$i + $count -1}] + #we restrict ECH to current line - as some terminals do - review - is that the only way it's implemented? + if {$i > [llength $o]-1} { + return + } + if {$end > [llength $o]-1} { + set end [expr {[llength $o]-1}] + } + set num [expr {$end - $start + 1}] + set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + return + } + proc render_setchar {i c } { + upvar outcols o + lset o $i $c + } + #is actually addgrapheme? + proc render_addchar {i c sgrstack gx0stack {insert_mode 0}} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + + if 0 { + if {$c eq "c"} { + puts "i:$i c:$c sgrstack:[ansistring VIEW $sgrstack]" + puts "understacks:[ansistring VIEW $ustacks]" + upvar overstacks overstacks + puts "overstacks:[ansistring VIEW $overstacks]" + puts "info level 0:[info level 0]" + } + } + + set nxt [llength $o] + if {!$insert_mode} { + if {$i < $nxt} { + #These lists must always be in sync + lset o $i $c + } else { + lappend o $c + } + if {$i < [llength $ustacks]} { + lset ustacks $i $sgrstack + lset gxstacks $i $gx0stack + } else { + lappend ustacks $sgrstack + lappend gxstacks $gx0stack + } + } else { + #insert of single-width vs double-width when underlying is double-width? + if {$i < $nxt} { + set o [linsert $o $i $c] + } else { + lappend o $c + } + if {$i < [llength $ustacks]} { + set ustacks [linsert $ustacks $i $sgrstack] + set gxstacks [linsert $gxstacks $i $gx0stack] + } else { + lappend ustacks $sgrstack + lappend gxstacks $gx0stack + } + } + } + +} + + + +# -- --- --- --- --- --- --- --- --- --- --- +tcl::namespace::eval overtype { + interp alias {} ::overtype::center {} ::overtype::centre +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide overtype [tcl::namespace::eval overtype { + variable version + set version 1.6.4 +}] +return + +#*** !doctools +#[manpage_end] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm new file mode 100644 index 00000000..143794fb --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/overtype-1.6.5.tm @@ -0,0 +1,3632 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) Julian Noble 2003-2023 +# +# @@ Meta Begin +# Application overtype 1.6.5 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin overtype_module_overtype 0 1.6.5] +#[copyright "2024"] +#[titledesc {overtype text layout - ansi aware}] [comment {-- Name section and table of contents description --}] +#[moddesc {overtype text layout}] [comment {-- Description at end of page heading --}] +#[require overtype] +#[keywords module text ansi] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of overtype +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by overtype +#[list_begin itemized] + +package require Tcl 8.6- +package require textutil +package require punk::lib ;#required for lines_as_list +package require punk::ansi ;#required to detect, split, strip and calculate lengths +package require punk::char ;#box drawing - and also unicode character width determination for proper layout of text with double-column-width chars +package require punk::assertion +#*** !doctools +#[item] [package {Tcl 8.6}] +#[item] [package textutil] +#[item] [package punk::ansi] +#[para] - required to detect, split, strip and calculate lengths of text possibly containing ansi codes +#[item] [package punk::char] +#[para] - box drawing - and also unicode character width determination for proper layout of text with double-column-width chars + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +#PERFORMANCE notes +#overtype is very performance sensitive - used in ansi output all over the place ie needs to be optimised +#NOTE use of tcl::dict::for tcl::string::range etc instead of ensemble versions. This is for the many tcl 8.6/8.7 interps which don't compile ensemble commands when in safe interps +#similar for tcl::namespace::eval - but this is at least on some versions of Tcl - faster even in a normal interp. Review to see if that holds for Tcl 9. +#for string map: when there are exactly 2 elements - it is faster to use a literal which has a special case optimisation in the c code +#ie use tcl::string::map {\n ""} ... instead of tcl::string::map [list \n ""] ... +#note that we can use unicode (e.g \uFF31) and other escapes such as \t within these curly braces - we don't have to use double quotes +#generally using 'list' is preferred for the map as less error prone. +#can also use: tcl::string::map "token $var" .. but be careful regarding quoting and whitespace in var. This should be used sparingly if at all. + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section API] + + +#Julian Noble - 2003 +#Released under standard 'BSD license' conditions. +# +#todo - ellipsis truncation indicator for center,right + +#v1.4 2023-07 - naive ansi color handling - todo - fix tcl::string::range +# - need to extract and replace ansi codes? + +tcl::namespace::eval overtype { + namespace import ::punk::assertion::assert + punk::assertion::active true + + namespace path ::punk::lib + + namespace export * + variable default_ellipsis_horizontal "..." ;#fallback + variable default_ellipsis_vertical "..." + tcl::namespace::eval priv { + proc _init {} { + upvar ::overtype::default_ellipsis_horizontal e_h + upvar ::overtype::default_ellipsis_vertical e_v + set e_h [format %c 0x2026] ;#Unicode Horizontal Ellipsis + set e_v [format %c 0x22EE] + #The unicode ellipsis looks more natural than triple-dash which is centred vertically whereas ellipsis is at floorline of text + #Also - unicode ellipsis has semantic meaning that other processors can interpret + #unicode does also provide a midline horizontal ellipsis 0x22EF + + #set e [format %c 0x2504] ;#punk::char::charshort boxd_ltdshhz - Box Drawings Light Triple Dash Horizontal + #if {![catch {package require punk::char}]} { + # set e [punk::char::charshort boxd_ltdshhz] + #} + } + } + priv::_init +} +proc overtype::about {} { + return "Simple text formatting. Author JMN. BSD-License" +} + +tcl::namespace::eval overtype { + variable grapheme_widths [tcl::dict::create] + + variable escape_terminals + #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). + tcl::dict::set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] + #tcl::dict::set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic + tcl::dict::set escape_terminals OSC [list \007 \033\\] ;#note mix of 1 and 2-byte terminals + + #self-contained 2 byte ansi escape sequences - review more? + variable ansi_2byte_codes_dict + set ansi_2byte_codes_dict [tcl::dict::create\ + "reset_terminal" "\u001bc"\ + "save_cursor_posn" "\u001b7"\ + "restore_cursor_posn" "\u001b8"\ + "cursor_up_one" "\u001bM"\ + "NEL - Next Line" "\u001bE"\ + "IND - Down one line" "\u001bD"\ + "HTS - Set Tab Stop" "\u001bH"\ + ] + + #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. + # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ +} + + + + +proc overtype::string_columns {text} { + if {[punk::ansi::ta::detect $text]} { + #error "error string_columns is for calculating character length of string - ansi codes must be stripped/rendered first e.g with punk::ansi::ansistrip. Alternatively try punk::ansi::printing_length" + set text [punk::ansi::ansistrip $text] + } + return [punk::char::ansifreestring_width $text] +} + +#todo - consider a way to merge overtype::left/centre/right +#These have similar algorithms/requirements - and should be refactored to be argument-wrappers over a function called something like overtype::renderblock +#overtype::renderblock could render the input to a defined (possibly overflowing in x or y) rectangle overlapping the underlay. +#(i.e not even necessariy having it's top left within the underlay) +tcl::namespace::eval overtype::priv { +} + +#could return larger than colwidth +proc _get_row_append_column {row} { + upvar outputlines outputlines + set idx [expr {$row -1}] + if {$row <= 1 || $row > [llength $outputlines]} { + return 1 + } else { + upvar opt_overflow opt_overflow + upvar colwidth colwidth + set existinglen [punk::ansi::printing_length [lindex $outputlines $idx]] + set endpos [expr {$existinglen +1}] + if {$opt_overflow} { + return $endpos + } else { + if {$endpos > $colwidth} { + return $colwidth + 1 + } else { + return $endpos + } + } + } +} + +tcl::namespace::eval overtype { + #*** !doctools + #[subsection {Namespace overtype}] + #[para] Core API functions for overtype + #[list_begin definitions] + + + + #tcl::string::range should generally be avoided for both undertext and overtext which contain ansi escapes and other cursor affecting chars such as \b and \r + #render onto an already-rendered (ansi already processed) 'underlay' string, a possibly ansi-laden 'overlay' string. + #The underlay and overlay can be multiline blocks of text of varying line lengths. + #The overlay may just be an ansi-colourised block - or may contain ansi cursor movements and cursor save/restore calls - in which case the apparent length and width of the overlay can't be determined as if it was a block of text. + #This is a single-shot rendering of strings - ie there is no way to chain another call containing a cursor-restore to previously rendered output and have it know about any cursor-saves in the first call. + # a cursor start position other than top-left is a possible addition to consider. + #see editbuf in punk::repl for a more stateful ansi-processor. Both systems use loops over overtype::renderline + proc renderspace {args} { + #*** !doctools + #[call [fun overtype::renderspace] [arg args] ] + #[para] usage: ?-transparent [lb]0|1[rb]? ?-overflow [lb]1|0[rb]? ?-ellipsis [lb]1|0[rb]? ?-ellipsistext ...? undertext overtext + + # @c overtype starting at left (overstrike) + # @c can/should we use something like this?: 'format "%-*s" $len $overtext + variable default_ellipsis_horizontal + + if {[llength $args] < 2} { + error {usage: ?-width ? ?-startcolumn ? ?-transparent [0|1|]? ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} + } + lassign [lrange $args end-1 end] underblock overblock + set opts [tcl::dict::create\ + -bias ignored\ + -width \uFFEF\ + -height \uFFEF\ + -startcolumn 1\ + -wrap 0\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -appendlines 1\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -experimental 0\ + -looplimit \uFFEF\ + ] + #-ellipsis args not used if -wrap is true + set argsflags [lrange $args 0 end-2] + foreach {k v} $argsflags { + switch -- $k { + -looplimit - -width - -height - -startcolumn - -bias - -wrap - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -appendlines - -transparent - -exposed1 - -exposed2 - -experimental { + tcl::dict::set opts $k $v + } + default { + error "overtype::renderspace unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + #set opts [tcl::dict::merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_overflow [tcl::dict::get $opts -overflow] + ##### + # review -wrap should map onto DECAWM terminal mode - the wrap 2 idea may not fit in with this?. + set opt_wrap [tcl::dict::get $opts -wrap] ;#wrap 1 is hard wrap cutting word at exact column, or 1 column early for 2w-glyph, wrap 2 is for language-based word-wrap algorithm (todo) + ##### + #for repl - standard output line indicator is a dash - todo, add a different indicator for a continued line. + set opt_width [tcl::dict::get $opts -width] + set opt_height [tcl::dict::get $opts -height] + set opt_startcolumn [tcl::dict::get $opts -startcolumn] + set opt_appendlines [tcl::dict::get $opts -appendlines] + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_exposed1 [tcl::dict::get $opts -exposed1] ;#widechar_exposed_left - todo + set opt_exposed2 [tcl::dict::get $opts -exposed2] ;#widechar_exposed_right - todo + # -- --- --- --- --- --- + + # ---------------------------- + # -experimental dev flag to set flags etc + # ---------------------------- + set data_mode 0 + set test_mode 1 + set info_mode 0 + set edit_mode 0 + set opt_experimental [tcl::dict::get $opts -experimental] + foreach o $opt_experimental { + switch -- $o { + test_mode { + set test_mode 1 + set info_mode 1 + } + old_mode { + set test_mode 0 + set info_mode 1 + } + data_mode { + set data_mode 1 + } + info_mode { + set info_mode 1 + } + edit_mode { + set edit_mode 1 + } + } + } + set test_mode 1 ;#try to eliminate + # ---------------------------- + + #modes + set insert_mode 0 ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l + set autowrap_mode $opt_wrap + set reverse_mode 0 + + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + + #set underlines [split $underblock \n] + + #underblock is a 'rendered' block - so width height make sense + #colwidth & colheight were originally named with reference to rendering into a 'column' of output e.g a table column - before cursor row/col was implemented. + #The naming is now confusing. It should be something like renderwidth renderheight ?? review + + if {$opt_width eq "\uFFEF" || $opt_height eq "\uFFEF"} { + lassign [blocksize $underblock] _w colwidth _h colheight + if {$opt_width ne "\uFFEF"} { + set colwidth $opt_width + } + if {$opt_height ne "\uFFEF"} { + set colheight $opt_height + } + } else { + set colwidth $opt_width + set colheight $opt_height + } + + # -- --- --- --- + #REVIEW - do we need ansi resets in the underblock? + if {$underblock eq ""} { + set underlines [lrepeat $colheight ""] + } else { + set underlines [split $underblock \n] + } + #if {$underblock eq ""} { + # set blank "\x1b\[0m\x1b\[0m" + # #set underlines [list "\x1b\[0m\x1b\[0m"] + # set underlines [lrepeat $colheight $blank] + #} else { + # #lines_as_list -ansiresets 1 will do nothing if -ansiresets 1 isn't specified - REVIEW + # set underlines [lines_as_list -ansiresets 1 $underblock] + #} + # -- --- --- --- + + #todo - reconsider the 'line' as the natural chunking mechanism for the overlay. + #In practice an overlay ANSI stream can be a single line with ansi moves/restores etc - or even have no moves or newlines, just relying on wrapping at the output colwidth + #In such cases - we process the whole shebazzle for the first output line - only reducing by the applied amount at the head each time, reprocessing the long tail each time. + #(in cases where there are interline moves or cursor jumps anyway) + #This works - but doesn't seem efficient. + #On the other hand.. maybe it depends on the data. For simpler files it's more efficient than splitting first + + #a hack until we work out how to avoid infinite loops... + # + set looplimit [tcl::dict::get $opts -looplimit] + if {$looplimit eq "\uFFEF"} { + #looping for each char is worst case (all newlines?) - anything over that is an indication of something broken? + #do we need any margin above the length? (telnet mapscii.me test) + set looplimit [expr {[tcl::string::length $overblock] + 10}] + } + + if {!$test_mode} { + set inputchunks [split $overblock \n] + } else { + set scheme 3 + switch -- $scheme { + 0 { + #one big chunk + set inputchunks [list $overblock] + } + 1 { + set inputchunks [punk::ansi::ta::split_codes $overblock] + } + 2 { + + #split into lines if possible first - then into plaintext/ansi-sequence chunks ? + set inputchunks [list ""] ;#put an empty plaintext split in for starters + set i 1 + set lines [split $overblock \n] + foreach ln $lines { + if {$i < [llength $lines]} { + append ln \n + } + set sequence_split [punk::ansi::ta::split_codes_single $ln] ;#use split_codes Not split_codes_single? + set lastpt [lindex $inputchunks end] + lset inputchunks end [tcl::string::cat $lastpt [lindex $sequence_split 0]] + lappend inputchunks {*}[lrange $sequence_split 1 end] + incr i + } + } + 3 { + #it turns out line based chunks are faster than the above.. probably because some of those end up doing the regex splitting twice + set lflines [list] + set inputchunks [split $overblock \n] + foreach ln $inputchunks { + append ln \n + lappend lflines $ln + } + if {[llength $lflines]} { + lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] + } + set inputchunks $lflines[unset lflines] + + } + } + } + + + #overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height + #lassign [blocksize $overblock] _w overblock_width _h overblock_height + + + set replay_codes_underlay [tcl::dict::create 1 ""] + #lappend replay_codes_overlay "" + set replay_codes_overlay "" + set unapplied "" + set cursor_saved_position [tcl::dict::create] + set cursor_saved_attributes "" + + + set outputlines $underlines + set overidx 0 + + #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext + set row 1 + if {$data_mode} { + set col [_get_row_append_column $row] + } else { + set col $opt_startcolumn + } + + set instruction_stats [tcl::dict::create] + + set loop 0 + #while {$overidx < [llength $inputchunks]} { } + + while {[llength $inputchunks]} { + #set overtext [lindex $inputchunks $overidx]; lset inputchunks $overidx "" + set overtext [lpop inputchunks 0] + if {![tcl::string::length $overtext]} { + incr loop + continue + } + #puts "----->[ansistring VIEW -lf 1 -vt 1 -nul 1 $overtext]<----" + set undertext [lindex $outputlines [expr {$row -1}]] + set renderedrow $row + + #renderline pads each underaly line to width with spaces and should track where end of data is + + + #set overtext [tcl::string::cat [lindex $replay_codes_overlay $overidx] $overtext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + if {[tcl::dict::exists $replay_codes_underlay $row]} { + set undertext [tcl::string::cat [tcl::dict::get $replay_codes_underlay $row] $undertext] + } + #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - + #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l + set LASTCALL [list -info 1 -insert_mode $insert_mode -autowrap_mode $autowrap_mode -transparent $opt_transparent -width $colwidth -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -cursor_column $col -cursor_row $row $undertext $overtext] + set rinfo [renderline -experimental $opt_experimental\ + -info 1\ + -insert_mode $insert_mode\ + -cursor_restore_attributes $cursor_saved_attributes\ + -autowrap_mode $autowrap_mode\ + -transparent $opt_transparent\ + -width $colwidth\ + -exposed1 $opt_exposed1\ + -exposed2 $opt_exposed2\ + -overflow $opt_overflow\ + -cursor_column $col\ + -cursor_row $row\ + $undertext\ + $overtext\ + ] + set instruction [tcl::dict::get $rinfo instruction] + set insert_mode [tcl::dict::get $rinfo insert_mode] + set autowrap_mode [tcl::dict::get $rinfo autowrap_mode] ;# + #set reverse_mode [tcl::dict::get $rinfo reverse_mode];#how to support in rendered linelist? we need to examine all pt/code blocks and flip each SGR stack? + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set overflow_right_column [tcl::dict::get $rinfo overflow_right_column] + set unapplied [tcl::dict::get $rinfo unapplied] + set unapplied_list [tcl::dict::get $rinfo unapplied_list] + set post_render_col [tcl::dict::get $rinfo cursor_column] + set post_render_row [tcl::dict::get $rinfo cursor_row] + set c_saved_pos [tcl::dict::get $rinfo cursor_saved_position] + set c_saved_attributes [tcl::dict::get $rinfo cursor_saved_attributes] + set visualwidth [tcl::dict::get $rinfo visualwidth] + set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] + set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] + #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + + + + #-- todo - detect looping properly + if {$row > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == $row && $instruction eq ""} { + puts stderr "overtype::renderspace loop?" + puts [ansistring VIEW $rinfo] + break + } + #-- + + if {[tcl::dict::size $c_saved_pos] >= 1} { + set cursor_saved_position $c_saved_pos + set cursor_saved_attributes $c_saved_attributes + } + + + set overflow_handled 0 + + + + set nextprefix "" + + + #todo - handle potential insertion mode as above for cursor restore? + #keeping separate branches for debugging - review and merge as appropriate when stable + tcl::dict::incr instruction_stats $instruction + switch -- $instruction { + {} { + if {$test_mode == 0} { + incr row + if {$data_mode} { + set col [_get_row_append_column $row] + if {$col > $colwidth} { + + } + } else { + set col 1 + } + } else { + #lf included in data + set row $post_render_row + set col $post_render_col + + #set col 1 + #if {$post_render_row != $renderedrow} { + # set col 1 + #} else { + # set col $post_render_col + #} + } + } + up { + + #renderline knows it's own line number, and knows not to go above row l + #it knows that a move whilst 1-beyond the width conflicts with the linefeed and reduces the move by one accordingly. + #row returned should be correct. + #column may be the overflow column - as it likes to report that to the caller. + + #Note that an ansi up sequence after last column going up to a previous line and also beyond the last column, will result in the next grapheme going onto the following line. + #this seems correct - as the column remains beyond the right margin so subsequent chars wrap (?) review + #puts stderr "up $post_render_row" + #puts stderr "$rinfo" + + #puts stdout "1 row:$row col $col" + set row $post_render_row + #data_mode (naming?) determines if we move to end of existing data or not. + #data_mode 0 means ignore existing line length and go to exact column + #set by -experimental flag + if {$data_mode == 0} { + set col $post_render_col + } else { + #This doesn't really work if columns are pre-filled with spaces..we can't distinguish them from data + #we need renderline to return the number of the maximum column filled (or min if we ever do r-to-l) + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + } + + #puts stdout "2 row:$row col $col" + #puts stdout "-----------------------" + #puts stdout $rinfo + #flush stdout + } + down { + if {$data_mode == 0} { + #renderline doesn't know how far down we can go.. + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } + set row $post_render_row + set col $post_render_col + } else { + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + + } + } + restore_cursor { + #testfile belinda.ans uses this + + #puts stdout "[a+ blue bold]CURSOR_RESTORE[a]" + if {[tcl::dict::exists $cursor_saved_position row]} { + set row [tcl::dict::get $cursor_saved_position row] + set col [tcl::dict::get $cursor_saved_position column] + #puts stdout "restoring: row $row col $col [ansistring VIEW $cursor_saved_attributes] [a] unapplied [ansistring VIEWCODES $unapplied]" + #set nextprefix $cursor_saved_attributes + #lset replay_codes_overlay [expr $overidx+1] $cursor_saved_attributes + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay]$cursor_saved_attributes + #set replay_codes_overlay $cursor_saved_attributes + set cursor_saved_position [tcl::dict::create] + set cursor_saved_attributes "" + } else { + #TODO + #?restore without save? + #should move to home position and reset ansi SGR? + #puts stderr "overtype::renderspace cursor_restore without save data available" + } + #If we were inserting prior to hitting the cursor_restore - there could be overflow_right data - generally the overtype functions aren't for inserting - but ansi can enable it + #if we were already in overflow when cursor_restore was hit - it shouldn't have been processed as an action - just stored. + if {!$overflow_handled && $overflow_right ne ""} { + #wrap before restore? - possible effect on saved cursor position + #this overflow data has previously been rendered so has no cursor movements or further save/restore operations etc + #we can just insert another call to renderline to solve this.. ? + #It would perhaps be more properly handled as a queue of instructions from our initial renderline call + #we don't need to worry about overflow next call (?)- but we should carry forward our gx and ansi stacks + + puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" + + set sub_info [overtype::renderline -info 1 -width $colwidth -insert_mode $insert_mode -autowrap_mode $autowrap_mode -overflow [tcl::dict::get $opts -overflow] "" $overflow_right] + set foldline [tcl::dict::get $sub_info result] + set insert_mode [tcl::dict::get $sub_info insert_mode] ;#probably not needed.. + set autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. + linsert outputlines $renderedrow $foldline + #review - row & col set by restore - but not if there was no save.. + } + set overflow_handled 1 + + } + move { + ######## + if {$post_render_row > [llength $outputlines]} { + #Ansi moves need to create new lines ? + #if {$opt_appendlines} { + # set diff [expr {$post_render_row - [llength $outputlines]}] + # if {$diff > 0} { + # lappend outputlines {*}[lrepeat $diff ""] + # } + # set row $post_render_row + #} else { + set row [llength $outputlines] + #} + } else { + set row $post_render_row + } + ####### + set col $post_render_col + #overflow + unapplied? + } + lf_start { + #raw newlines - must be test_mode + # ---------------------- + #test with fruit.ans + #test - treating as newline below... + #append rendered $overflow_right + #set overflow_right "" + set row $renderedrow + incr row + if {$row > [llength $outputlines]} { + lappend outputlines "" + } + set col $opt_startcolumn + # ---------------------- + } + lf_mid { + + if 0 { + #set rhswidth [punk::ansi::printing_length $overflow_right] + #only show debug when we have overflow? + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + + set rhs "" + if {$overflow_right ne ""} { + set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $overflow_right]] + set rhs [textblock::frame -title overflow_right $rhs] + } + puts [textblock::join $lhs " $post_render_col " $rhs] + } + + if {!$test_mode} { + #rendered + append rendered $overflow_right + #set replay_codes_overlay "" + set overflow_right "" + + + set row $renderedrow + + set col $opt_startcolumn + incr row + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } else { + set edit_mode 0 + if {$edit_mode} { + set inputchunks [linsert $inputchunks 0 $overflow_right$unapplied] + set overflow_right "" + set unapplied "" + set row $post_render_row + #set col $post_render_col + set col $opt_startcolumn + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } else { + append rendered $overflow_right + set overflow_right "" + set row $post_render_row + set col $opt_startcolumn + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + } + } + } + lf_overflow { + #linefeed after colwidth e.g at column 81 for an 80 col width + #we may also have other control sequences that came after col 80 e.g cursor save + + if 0 { + set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] + set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] + set rhs "" + + #assertion - there should be no overflow.. + puts $lhs + } + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right + + set row $post_render_row + #set row $renderedrow + #incr row + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat 1 ""] + } + set col $opt_startcolumn + + } + newlines_above { + #we get a newlines_above instruction when received at column 1 + #In some cases we want to treat that as request to insert a new blank line above, and move our row 1 down (staying with the data) + #in other cases - we want to treat at column 1 the same as any other + + puts "--->newlines_above" + puts "rinfo: $rinfo" + #renderline doesn't advance the row for us - the caller has the choice to implement or not + set row $post_render_row + set col $post_render_col + if {$insert_lines_above > 0} { + set row $renderedrow + set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] + incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above + #? set row $post_render_row #can renderline tell us? + } + } + newlines_below { + #obsolete? - use for ANSI insert lines sequence + if {$data_mode == 0} { + puts --->nl_below + set row $post_render_row + set col $post_render_col + if {$insert_lines_below == 1} { + if {$test_mode == 0} { + set row $renderedrow + set outputlines [linsert $outputlines [expr {$renderedrow }] {*}[lrepeat $insert_lines_below ""]] ;#note - linsert can add to end too + incr row $insert_lines_below + set col $opt_startcolumn + } else { + #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] + #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] + #set rhs "" + #if {$overflow_right ne ""} { + # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] + # set rhs [textblock::frame -title overflow_right $rhs] + #} + #puts [textblock::join $lhs $rhs] + + #rendered + append rendered $overflow_right + # + + + set overflow_right "" + set row $renderedrow + #only add newline if we're at the bottom + if {$row > [llength $outputlines]} { + lappend outputlines {*}[lrepeat $insert_lines_below ""] + } + incr row $insert_lines_below + set col $opt_startcolumn + + + + } + } + } else { + set row $post_render_row + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff ""] + } + lappend outputlines "" + } + } else { + set existingdata [lindex $outputlines [expr {$post_render_row -1}]] + set lastdatacol [punk::ansi::printing_length $existingdata] + if {$lastdatacol < $colwidth} { + set col [expr {$lastdatacol+1}] + } else { + set col $colwidth + } + } + } + } + wrapmoveforward { + #doesn't seem to be used by fruit.ans testfile + #used by dzds.ans + #note that cursor_forward may move deep into the next line - or even span multiple lines !TODO + set c $colwidth + set r $post_render_row + if {$post_render_col > $colwidth} { + set i $c + while {$i <= $post_render_col} { + if {$c == $colwidth+1} { + incr r + if {$opt_appendlines} { + if {$r < [llength $outputlines]} { + lappend outputlines "" + } + } + set c $opt_startcolumn + } else { + incr c + } + incr i + } + set col $c + } else { + #why are we getting this instruction then? + puts stderr "wrapmoveforward - test" + set r [expr {$post_render_row +1}] + set c $post_render_col + } + set row $r + set col $c + } + wrapmovebackward { + set c $colwidth + set r $post_render_row + if {$post_render_col < 1} { + set c 1 + set i $c + while {$i >= $post_render_col} { + if {$c == 0} { + if {$r > 1} { + incr r -1 + set c $colwidth + } else { + #leave r at 1 set c 1 + #testfile besthpav.ans first line top left border alignment + set c 1 + break + } + } else { + incr c -1 + } + incr i -1 + } + set col $c + } else { + puts stderr "Wrapmovebackward - but postrendercol >= 1???" + } + set row $r + set col $c + } + overflow { + #normal single-width grapheme overflow + #puts "----normal overflow --- [ansistring VIEWSTYLE -lf 1 -nul 1 -vt 1 $rendered]" + set row $post_render_row ;#renderline will not advance row when reporting overflow char + if {$autowrap_mode} { + incr row + set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? + } else { + #this works for test_mode (which should become the default) - but could give a bad result otherwise - review - add tests fix. + set col $post_render_col + #set unapplied "" ;#this seems wrong? + #set unapplied [tcl::string::range $unapplied 1 end] + #The overflow can only be triggered by a grapheme (todo cluster?) - but our unapplied could contain SGR codes prior to the grapheme that triggered overflow - so we need to skip beyond any SGRs + #There may be more than one, because although the stack leading up to overflow may have been merged - codes between the last column and the overflowing grapheme will remain separate + #We don't expect any movement or other ANSI codes - as if they came before the grapheme, they would have triggered a different instruction to 'overflow' + set idx 0 + set next_grapheme_index -1 + foreach u $unapplied_list { + if {![punk::ansi::ta::detect $u]} { + set next_grapheme_index $idx + break + } + incr idx + } + assert {$next_grapheme_index >= 0} + #drop the overflow grapheme - keeping all codes in place. + set unapplied [join [lreplace $unapplied_list $next_grapheme_index $next_grapheme_index] ""] + #we need to run the reduced unapplied on the same line - further graphemes will just overflow again, but codes or control chars could trigger jumps to other lines + + set overflow_handled 1 + #handled by dropping overflow if any + } + } + overflow_splitchar { + set row $post_render_row ;#renderline will not advance row when reporting overflow char + + #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts + #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc + if {$autowrap_mode} { + if {$colwidth < 2} { + #edge case of rendering to a single column output - any 2w char will just cause a loop if we don't substitute with something, or drop the character + set idx 0 + set triggering_grapheme_index -1 + foreach u $unapplied_list { + if {![punk::ansi::ta::detect $u]} { + set triggering_grapheme_index $idx + break + } + incr idx + } + set unapplied [join [lreplace $unapplied_list $triggering_grapheme_index $triggering_grapheme_index $opt_exposed1] ""] + } else { + set col $opt_startcolumn + incr row + } + } else { + set overflow_handled 1 + #handled by dropping entire overflow if any + if {$colwidth < 2} { + set idx 0 + set triggering_grapheme_index -1 + foreach u $unapplied_list { + if {![punk::ansi::ta::detect $u]} { + set triggering_grapheme_index $idx + break + } + incr idx + } + set unapplied [join [lreplace $unapplied_list $triggering_grapheme_index $triggering_grapheme_index $opt_exposed1] ""] + } + } + + } + vt { + + #can vt add a line like a linefeed can? + set row $post_render_row + set col $post_render_col + } + default { + puts stderr "overtype::renderspace unhandled renderline instruction '$instruction'" + } + + } + + + if {!$opt_overflow && !$autowrap_mode} { + #not allowed to overflow column or wrap therefore we get overflow data to truncate + if {[tcl::dict::get $opts -ellipsis]} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + if {[tcl::string::trim [punk::ansi::ansistrip $lostdata]] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::right $rendered $opt_ellipsistext] + } + set overflow_handled 1 + } else { + #no wrap - no ellipsis - silently truncate + set overflow_handled 1 + } + } + + + + if {$renderedrow <= [llength $outputlines]} { + lset outputlines [expr {$renderedrow-1}] $rendered + } else { + if {$opt_appendlines} { + lappend outputlines $rendered + } else { + #? + lset outputlines [expr {$renderedrow-1}] $rendered + } + } + + if {!$overflow_handled} { + append nextprefix $overflow_right + } + + append nextprefix $unapplied + + if 0 { + if {$nextprefix ne ""} { + set nextoveridx [expr {$overidx+1}] + if {$nextoveridx >= [llength $inputchunks]} { + lappend inputchunks $nextprefix + } else { + #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] + set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] + } + } + } + + if {$nextprefix ne ""} { + set inputchunks [linsert $inputchunks 0 $nextprefix] + } + + + incr overidx + incr loop + if {$loop >= $looplimit} { + puts stderr "overtype::renderspace looplimit reached ($looplimit)" + lappend outputlines "[a+ red bold] - looplimit $looplimit reached[a]" + set Y [a+ yellow bold] + set RST [a] + set sep_header ----DEBUG----- + set debugmsg "" + append debugmsg "${Y}${sep_header}${RST}" \n + append debugmsg "looplimit $looplimit reached\n" + append debugmsg "test_mode:$test_mode\n" + append debugmsg "data_mode:$data_mode\n" + append debugmsg "opt_appendlines:$opt_appendlines\n" + append debugmsg "prev_row :[tcl::dict::get $LASTCALL -cursor_row]\n" + append debugmsg "prev_col :[tcl::dict::get $LASTCALL -cursor_column]\n" + tcl::dict::for {k v} $rinfo { + append debugmsg "${Y}$k [ansistring VIEW -lf 1 -vt 1 $v]$RST" \n + } + append debugmsg "${Y}[string repeat - [string length $sep_header]]$RST" \n + + puts stdout $debugmsg + #todo - config regarding error dumps rather than just dumping in working dir + set fd [open [pwd]/error_overtype.txt w] + puts $fd $debugmsg + close $fd + error $debugmsg + break + } + } + + set result [join $outputlines \n] + if {$info_mode} { + #emit to debug window like basictelnet does? make debug configurable as syslog or even a telnet server to allow on 2nd window? + #append result \n$instruction_stats\n + } + return $result + } + + #todo - left-right ellipsis ? + proc centre {args} { + variable default_ellipsis_horizontal + if {[llength $args] < 2} { + error {usage: ?-transparent [0|1]? ?-bias [left|right]? ?-overflow [1|0]? undertext overtext} + } + + foreach {underblock overblock} [lrange $args end-1 end] break + + #todo - vertical vs horizontal overflow for blocks + set opts [tcl::dict::create\ + -bias left\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + ] + set argsflags [lrange $args 0 end-2] + foreach {k v} $argsflags { + switch -- $k { + -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 { + tcl::dict::set opts $k $v + } + default { + set known_opts [tcl::dict::keys $opts] + error "overtype::centre unknown option '$k'. Known options: $known_opts" + } + } + } + #set opts [tcl::dict::merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsis [tcl::dict::get $opts -ellipsis] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + # -- --- --- --- --- --- + + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {$colwidth - $overblock_width}] + if {$under_exposed_max > 0} { + #background block is wider + if {$under_exposed_max % 2 == 0} { + #even left/right exposure + set left_exposed [expr {$under_exposed_max / 2}] + } else { + set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division + if {[tcl::string::tolower [tcl::dict::get $opts -bias]] eq "left"} { + set left_exposed $beforehalf + } else { + #bias to the right + set left_exposed [expr {$beforehalf + 1}] + } + } + } else { + set left_exposed 0 + } + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + set undertext "$undertext[string repeat { } $udiff]" + } + set undertext [tcl::string::cat $replay_codes_underlay $undertext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + #review - right-to-left langs should elide on left! - extra option required + + if {$overflowlength > 0} { + #overlay line wider or equal + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow [tcl::dict::get $opts -overflow] -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set unapplied [tcl::dict::get $rinfo unapplied] + #todo - get replay_codes from overflow_right instead of wherever it was truncated? + + #overlay line data is wider - trim if overflow not specified in opts - and overtype an ellipsis at right if it was specified + if {![tcl::dict::get $opts -overflow]} { + #lappend outputlines [tcl::string::range $overtext 0 [expr {$colwidth - 1}]] + #set overtext [tcl::string::range $overtext 0 $colwidth-1 ] + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + #don't use tcl::string::range on ANSI data + #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::right $rendered $opt_ellipsistext] + } + } + } + lappend outputlines $rendered + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent $undertext $overtext] + } else { + #background block is wider than or equal to data for this line + #lappend outputlines [renderline -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + set rinfo [renderline -info 1 -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] + lappend outputlines [tcl::dict::get $rinfo result] + } + set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + } + return [join $outputlines \n] + } + + #overtype::right is for a rendered ragged underblock and a rendered ragged overblock + #ie we can determine the block width for bost by examining the lines and picking the longest. + # + proc right {args} { + #NOT the same as align-right - which should be done to the overblock first if required + variable default_ellipsis_horizontal + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-overflow [1|0]? ?-transparent 0|? undertext overtext} + } + foreach {underblock overblock} [lrange $args end-1 end] break + + set opts [tcl::dict::create\ + -bias ignored\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -align "left"\ + ] + set argsflags [lrange $args 0 end-2] + tcl::dict::for {k v} $argsflags { + switch -- $k { + -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -align { + tcl::dict::set opts $k $v + } + default { + set known_opts [tcl::dict::keys $opts] + error "overtype::centre unknown option '$k'. Known options: $known_opts" + } + } + } + #set opts [tcl::dict::merge $defaults $argsflags] + # -- --- --- --- --- --- + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsis [tcl::dict::get $opts -ellipsis] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_overflow [tcl::dict::get $opts -overflow] + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + set opt_align [tcl::dict::get $opts -align] + # -- --- --- --- --- --- + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] + set left_exposed $under_exposed_max + + + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + #puts xxx + append undertext [string repeat { } $udiff] + } + if {$overtext_datalen < $overblock_width} { + set odiff [expr {$overblock_width - $overtext_datalen}] + switch -- $opt_align { + left { + set startoffset 0 + } + right { + set startoffset $odiff + } + default { + set half [expr {$odiff / 2}] + #set lhs [string repeat { } $half] + #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left + #set rhs [string repeat { } $righthalf] + set startoffset $half + } + } + } else { + set startoffset 0 ;#negative? + } + + set undertext [tcl::string::cat $replay_codes_underlay $undertext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + if {$overflowlength > 0} { + #raw overtext wider than undertext column + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] + set replay_codes [tcl::dict::get $rinfo replay_codes] + set rendered [tcl::dict::get $rinfo result] + if {!$opt_overflow} { + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set ellipsis [tcl::string::cat $replay_codes $opt_ellipsistext] + #todo - overflow on left if allign = right?? + set rendered [overtype::right $rendered $ellipsis] + } + } + } + lappend outputlines $rendered + } else { + #padded overtext + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] + #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] + lappend outputlines [tcl::dict::get $rinfo result] + } + set replay_codes [tcl::dict::get $rinfo replay_codes] + set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + } + + return [join $outputlines \n] + } + + proc left {args} { + overtype::block -blockalign left {*}$args + } + #overtype a (possibly ragged) underblock with a (possibly ragged) overblock + proc block {args} { + variable default_ellipsis_horizontal + # @d !todo - implement overflow, length checks etc + + if {[llength $args] < 2} { + error {usage: ?-blockalign left|centre|right? ?-textalign left|centre|right? ?-overflow [1|0]? ?-transparent 0|? undertext overtext} + } + #foreach {underblock overblock} [lrange $args end-1 end] break + lassign [lrange $args end-1 end] underblock overblock + + set opts [tcl::dict::create\ + -ellipsis 0\ + -ellipsistext $default_ellipsis_horizontal\ + -ellipsiswhitespace 0\ + -overflow 0\ + -transparent 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -textalign "left"\ + -textvertical "top"\ + -blockalign "left"\ + -blockalignbias left\ + -blockvertical "top"\ + ] + set argsflags [lrange $args 0 end-2] + tcl::dict::for {k v} $argsflags { + switch -- $k { + -blockalignbias - -ellipsis - -ellipsistext - -ellipsiswhitespace - -overflow - -transparent - -exposed1 - -exposed2 - -textalign - -blockalign - -blockvertical { + tcl::dict::set opts $k $v + } + default { + error "overtype::block unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + # -- --- --- --- --- --- + set opt_transparent [tcl::dict::get $opts -transparent] + set opt_ellipsis [tcl::dict::get $opts -ellipsis] + set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] + set opt_ellipsiswhitespace [tcl::dict::get $opts -ellipsiswhitespace] + set opt_overflow [tcl::dict::get $opts -overflow] + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + set opt_textalign [tcl::dict::get $opts -textalign] + set opt_blockalign [tcl::dict::get $opts -blockalign] + if {$opt_blockalign eq "center"} { + set opt_blockalign "centre" + } + # -- --- --- --- --- --- + + set underblock [tcl::string::map {\r\n \n} $underblock] + set overblock [tcl::string::map {\r\n \n} $overblock] + + set underlines [split $underblock \n] + #set colwidth [tcl::mathfunc::max {*}[lmap v $underlines {punk::ansi::printing_length $v}]] + lassign [blocksize $underblock] _w colwidth _h colheight + set overlines [split $overblock \n] + #set overblock_width [tcl::mathfunc::max {*}[lmap v $overlines {punk::ansi::printing_length $v}]] + lassign [blocksize $overblock] _w overblock_width _h overblock_height + set under_exposed_max [expr {max(0,$colwidth - $overblock_width)}] + + switch -- $opt_blockalign { + left { + set left_exposed 0 + } + right { + set left_exposed $under_exposed_max + } + centre { + if {$under_exposed_max > 0} { + #background block is wider + if {$under_exposed_max % 2 == 0} { + #even left/right exposure + set left_exposed [expr {$under_exposed_max / 2}] + } else { + set beforehalf [expr {$under_exposed_max / 2}] ;#1 less than half due to integer division + if {[tcl::string::tolower [tcl::dict::get $opts -blockalignbias]] eq "left"} { + set left_exposed $beforehalf + } else { + #bias to the right + set left_exposed [expr {$beforehalf + 1}] + } + } + } else { + set left_exposed 0 + } + } + default { + set left_exposed 0 + } + } + + + + set outputlines [list] + if {[punk::ansi::ta::detect_sgr [lindex $overlines 0]]} { + set replay_codes "[punk::ansi::a]" + } else { + set replay_codes "" + } + set replay_codes_underlay "" + set replay_codes_overlay "" + foreach undertext $underlines overtext $overlines { + set overtext_datalen [punk::ansi::printing_length $overtext] + set ulen [punk::ansi::printing_length $undertext] + if {$ulen < $colwidth} { + set udiff [expr {$colwidth - $ulen}] + #puts xxx + append undertext [string repeat { } $udiff] + } + if {$overtext_datalen < $overblock_width} { + set odiff [expr {$overblock_width - $overtext_datalen}] + switch -- $opt_textalign { + left { + set startoffset 0 + } + right { + set startoffset $odiff + } + default { + set half [expr {$odiff / 2}] + #set lhs [string repeat { } $half] + #set righthalf [expr {$odiff - $half}] ;#remainder - may be one more - so we are biased left + #set rhs [string repeat { } $righthalf] + set startoffset $half + } + } + } else { + set startoffset 0 ;#negative? + } + + set undertext [tcl::string::cat $replay_codes_underlay $undertext] + set overtext [tcl::string::cat $replay_codes_overlay $overtext] + + set overflowlength [expr {$overtext_datalen - $colwidth}] + if {$overflowlength > 0} { + #raw overtext wider than undertext column + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 -overflow $opt_overflow -startcolumn [expr {1 + $startoffset}] $undertext $overtext] + set replay_codes [tcl::dict::get $rinfo replay_codes] + set rendered [tcl::dict::get $rinfo result] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set unapplied [tcl::dict::get $rinfo unapplied] + if {!$opt_overflow} { + if {$opt_ellipsis} { + set show_ellipsis 1 + if {!$opt_ellipsiswhitespace} { + #we don't want ellipsis if only whitespace was lost + #don't use tcl::string::range on ANSI data + #set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + set lostdata "" + if {$overflow_right ne ""} { + append lostdata $overflow_right + } + if {$unapplied ne ""} { + append lostdata $unapplied + } + if {[tcl::string::trim $lostdata] eq ""} { + set show_ellipsis 0 + } + } + if {$show_ellipsis} { + set rendered [overtype::block -blockalign right $rendered $opt_ellipsistext] + } + } + + #if {$opt_ellipsis} { + # set show_ellipsis 1 + # if {!$opt_ellipsiswhitespace} { + # #we don't want ellipsis if only whitespace was lost + # set lostdata [tcl::string::range $overtext end-[expr {$overflowlength-1}] end] + # if {[tcl::string::trim $lostdata] eq ""} { + # set show_ellipsis 0 + # } + # } + # if {$show_ellipsis} { + # set ellipsis [tcl::string::cat $replay_codes $opt_ellipsistext] + # #todo - overflow on left if allign = right?? + # set rendered [overtype::right $rendered $ellipsis] + # } + #} + } + lappend outputlines $rendered + } else { + #padded overtext + #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] + #Note - we still need overflow here - as although the overtext is short - it may oveflow due to the startoffset + set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -overflow $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] + set overflow_right [tcl::dict::get $rinfo overflow_right] + set unapplied [tcl::dict::get $rinfo unapplied] + lappend outputlines [tcl::dict::get $rinfo result] + } + set replay_codes [tcl::dict::get $rinfo replay_codes] + set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] + set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] + } + + return [join $outputlines \n] + } + # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + # renderline written from a left-right line orientation perspective as a first-shot at getting something useful. + # ultimately right-to-left, top-to-bottom and bottom-to-top are probably needed. + # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + # + # + #-returnextra enables returning of overflow and length + #review - use punk::ansi::ta::detect to short-circuit processing and do simpler string calcs as an optimisation? + #review - DECSWL/DECDWL double width line codes - very difficult/impossible to align and compose with other elements + #(could render it by faking it with sixels and a lot of work - find/make a sixel font and ensure it's exactly 2 cols per char) + #todo - review transparency issues with single/double width characters + #bidi - need a base direction and concept of directional runs for RTL vs LTR - may be best handled at another layer? + proc renderline {args} { + #*** !doctools + #[call [fun overtype::renderline] [arg args] ] + #[para] renderline is the core engine for overtype string processing (frames & textblocks), and the raw mode commandline repl for the Tcl Punk Shell + #[para] It is also a central part of an ansi (micro) virtual terminal-emulator of sorts + #[para] This system does a half decent job at rendering 90's ANSI art to manipulable colour text blocks that can be joined & framed for layout display within a unix or windows terminal + #[para] Renderline helps maintain ANSI text styling reset/replay codes so that the styling of one block doesn't affect another. + #[para] Calling on the punk::ansi library - it can coalesce codes to keep the size down. + #[para] It is a giant mess of doing exactly what common wisdom says not to do... lots at once. + #[para] renderline is part of the Unicode and ANSI aware Overtype system which 'renders' a block of text onto a static underlay + #[para] The underlay is generally expected to be an ordered set of lines or a rectangular text block analogous to a terminal screen - but it can also be ragged in line length, or just blank. + #[para] The overlay couuld be similar - in which case it may often be used to overwrite a column or section of the underlay. + #[para] The overlay could however be a sequence of ANSI-laden text that jumps all over the place. + # + #[para] renderline itself only deals with a single line - or sometimes a single character. It is generally called from a loop that does further terminal-like or textblock processing. + #[para] By suppyling the -info 1 option - it can return various fields indicating the state of the render. + #[para] The main 3 are the result, overflow_right, and unapplied. + #[para] Renderline handles cursor movements from either keystrokes or ANSI sequences but for a full system the aforementioned loop will need to be in place to manage the set of lines under manipulation. + + if {[llength $args] < 2} { + error {usage: ?-info 0|1? ?-startcolumn ? ?-cursor_column ? ?-cursor_row |""? ?-transparent [0|1|]? ?-overflow [1|0]? undertext overtext} + } + lassign [lrange $args end-1 end] under over + if {[string first \n $under] >= 0} { + error "overtype::renderline not allowed to contain newlines in undertext" + } + #if {[string first \n $over] >=0 || [string first \n $under] >= 0} { + # error "overtype::renderline not allowed to contain newlines" + #} + + #generally faster to create a new dict in the proc than to use a namespace variable to create dict once and link to variable (2024 8.6/8.7) + set opts [tcl::dict::create\ + -etabs 0\ + -width \uFFEF\ + -overflow 0\ + -transparent 0\ + -startcolumn 1\ + -cursor_column 1\ + -cursor_row ""\ + -insert_mode 1\ + -autowrap_mode 1\ + -reverse_mode 0\ + -info 0\ + -exposed1 \uFFFD\ + -exposed2 \uFFFD\ + -cursor_restore_attributes ""\ + -cp437 0\ + -experimental {}\ + ] + #-cursor_restore_attributes only - for replay stack - position and actual setting/restoring handled by throwback to caller + + #cursor_row, when numeric will allow detection of certain row moves that are still within our row - allowing us to avoid an early return + #An empty string for cursor_row tells us we have no info about our own row context, and to return with an unapplied string if any row move occurs + + #exposed1 and exposed2 for first and second col of underying 2wide char which is truncated by transparency or overflow + #todo - return info about such grapheme 'cuts' in -info structure and/or create option to raise an error + + set argsflags [lrange $args 0 end-2] + tcl::dict::for {k v} $argsflags { + switch -- $k { + -experimental - -cp437 - -width - -overflow - -transparent - -startcolumn - -cursor_column - -cursor_row - -insert_mode - -autowrap_mode - -reverse_mode - -info - -exposed1 - -exposed2 - -cursor_restore_attributes { + tcl::dict::set opts $k $v + } + default { + error "overtype::renderline unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_etabs [tcl::dict::get $opts -etabs] + set opt_overflow [tcl::dict::get $opts -overflow] + set opt_colstart [tcl::dict::get $opts -startcolumn] ;#lhs limit for overlay - an offset to cursor_column - first visible column is 1. 0 or < 0 are before the start of the underlay + set opt_colcursor [tcl::dict::get $opts -cursor_column];#start cursor column relative to overlay + set opt_row_context [tcl::dict::get $opts -cursor_row] + if {[string length $opt_row_context]} { + if {![tcl::string::is integer -strict $opt_row_context] || $opt_row_context <1 } { + error "overtype::renderline -cursor_row must be empty for unspecified/unknown or a non-zero positive integer. received: '$opt_row_context'" + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + #The _mode flags correspond to terminal modes that can be set/reset via escape sequences (e.g DECAWM wraparound mode) + set opt_insert_mode [tcl::dict::get $opts -insert_mode];#should usually be 1 for each new line in editor mode but must be initialised to 1 externally (review) + #default is for overtype + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_autowrap_mode [tcl::dict::get $opts -autowrap_mode] ;#DECAWM - char or movement can go beyond leftmost/rightmost col to prev/next line + set opt_reverse_mode [tcl::dict::get $opts -reverse_mode] ;#DECSNM + # -- --- --- --- --- --- --- --- --- --- --- --- + set temp_cursor_saved [tcl::dict::get $opts -cursor_restore_attributes] + + set test_mode 0 + set cp437_glyphs [tcl::dict::get $opts -cp437] + foreach e [tcl::dict::get $opts -experimental] { + switch -- $e { + test_mode { + set test_mode 1 + set cp437_glyphs 1 + } + } + } + set test_mode 1 ;#try to elminate + set cp437_map [tcl::dict::create] + if {$cp437_glyphs} { + set cp437_map [set ::punk::ansi::cp437_map] + #for cp437 images we need to map these *after* splitting ansi + #some old files might use newline for its glyph.. but we can't easily support that. + #Not sure how old files did it.. maybe cr lf in sequence was newline and any lone cr or lf were displayed as glyphs? + tcl::dict::unset cp437_map \n + } + + set opt_transparent [tcl::dict::get $opts -transparent] + if {$opt_transparent eq "0"} { + set do_transparency 0 + } else { + set do_transparency 1 + if {$opt_transparent eq "1"} { + set opt_transparent {[\s]} + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_returnextra [tcl::dict::get $opts -info] + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_exposed1 [tcl::dict::get $opts -exposed1] + set opt_exposed2 [tcl::dict::get $opts -exposed2] + # -- --- --- --- --- --- --- --- --- --- --- --- + + if {$opt_row_context eq ""} { + set cursor_row 1 + } else { + set cursor_row $opt_row_context + } + + + #----- + # + if {[info exists punk::console::tabwidth]} { + #punk console is updated if punk::console::set_tabstop_width is used or rep is started/restarted + #It is way too slow to test the current width by querying the terminal here - so it could conceivably get out of sync + #todo - we also need to handle the new threaded repl where console config is in a different thread. + # - also - concept of sub-regions being mini-consoles with their own settings - 'id' for console, or use in/out channels as id? + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + + set overdata $over + if {!$cp437_glyphs} { + #REVIEW! tabify will give different answers for an ANSI colourised string vs plain text + if {!$opt_etabs} { + if {[string first \t $under] >= 0} { + #set under [textutil::tabify::untabify2 $under] + set under [textutil::tabify::untabifyLine $under $tw] + } + if {[string first \t $over] >= 0} { + #set overdata [textutil::tabify::untabify2 $over] + set overdata [textutil::tabify::untabifyLine $over $tw] + } + } + } + #------- + + #ta_detect ansi and do simpler processing? + + #we repeat tests for grapheme width in different loops - rather than create another datastructure to store widths based on column, + #we'll use the grapheme_width_cached function as a lookup table of all graphemes encountered - as there will often be repeats in different positions anyway. + + # -- --- --- --- --- --- --- --- + if {$under ne ""} { + if {[punk::ansi::ta::detect $under]} { + set undermap [punk::ansi::ta::split_codes_single $under] + } else { + #single plaintext part + set undermap [list $under] + } + } else { + set undermap [list] + } + set understacks [list] + set understacks_gx [list] + + set i_u -1 ;#underlay may legitimately be empty + set undercols [list] + set u_codestack [list] + #u_gx_stack probably isn't really a stack - I don't know if g0 g1 can stack or not - for now we support only g0 anyway + set u_gx_stack [list] ;#separate stack for g0 (g1 g2 g3?) graphics on and off (DEC special graphics) + #set pt_underchars "" ;#for string_columns length calculation for overflow 0 truncation + set remainder [list] ;#for returnextra + foreach {pt code} $undermap { + #pt = plain text + #append pt_underchars $pt + if {$cp437_glyphs} { + set pt [tcl::string::map $cp437_map $pt] + } + foreach grapheme [punk::char::grapheme_split $pt] { + #an ugly but easy hack to serve *some* common case ascii quickly with byte-compiled literal switch - feels dirty. + #.. but even 0.5uS per char (grapheme_width_cached) adds up quickly when stitching lots of lines together. + #todo - test decimal value instead, compare performance + switch -- $grapheme { + " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - + a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - + z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + set width 1 + } + default { + if {$grapheme eq "\u0000"} { + #use null as empty cell representation - review + #use of this will probably collide with some application at some point + #consider an option to set the empty cell character + set width 1 + } else { + set width [grapheme_width_cached $grapheme] + #we still want most controls and other zero-length codepoints such as \u200d (zero width joiner) to stay zero-length + #we substitute lone ESC that weren't captured within ANSI context as a debugging aid to see malformed ANSI + #todo - default to off and add a flag (?) to enable this substitution + set sub_stray_escapes 0 + if {$sub_stray_escapes && $width == 0} { + if {$grapheme eq "\x1b"} { + set gvis [ansistring VIEW $grapheme] + set grapheme $gvis + set width 1 + } + } + } + } + } + #set width [grapheme_width_cached $grapheme] + incr i_u + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + + lappend undercols $grapheme + if {$width > 1} { + #presumably there are no triple-column or wider unicode chars.. until the aliens arrive.(?) + #but what about emoji combinations etc - can they be wider than 2? + #todo - if -etabs enabled - then we treat \t as the width determined by our elastic tabstop + incr i_u + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + lappend undercols "" + } + } + + #underlay should already have been rendered and not have non-sgr codes - but let's retain the check for them and not stack them if other codes are here + + #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc + if {$code ne ""} { + set c1c2 [tcl::string::range $code 0 1] + + set leadernorm [tcl::string::range [tcl::string::map [list\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\( 7GFX\ + ] $c1c2] 0 3];# leadernorm is 1st 2 chars mapped to normalised indicator - or is original 2 chars + + switch -- $leadernorm { + 7CSI - 8CSI { + #need to exclude certain leaders after the lb e.g < for SGR 1006 mouse + #REVIEW - what else could end in m but be mistaken as a normal SGR code here? + set maybemouse "" + if {[tcl::string::index $c1c2 0] eq "\x1b"} { + set maybemouse [tcl::string::index $code 2] + } + + if {$maybemouse ne "<" && [tcl::string::index $code end] eq "m"} { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set u_codestack [list "\x1b\[m"] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set u_codestack [list $code] + } else { + #basic simplification first.. straight dups + set dup_posns [lsearch -all -exact $u_codestack $code] ;#-exact because of square-bracket glob chars + set u_codestack [lremove $u_codestack {*}$dup_posns] + lappend u_codestack $code + } + } + } + 7GFX { + switch -- [tcl::string::index $code 2] { + "0" { + set u_gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess + } + B { + set u_gx_stack [list] + } + } + } + default { + + } + + } + + #if {[punk::ansi::codetype::is_sgr_reset $code]} { + # #set u_codestack [list] + #} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + #} elseif {[punk::ansi::codetype::is_sgr $code]} { + #} else { + # #leave SGR stack as is + # if {[punk::ansi::codetype::is_gx_open $code]} { + # } elseif {[punk::ansi::codetype::is_gx_close $code]} { + # } + #} + } + #consider also if there are other codes that should be stacked..? + } + + if {!$test_mode} { + #fill columns to width with spaces, and carry over stacks - we will have to keep track of where the underlying data ends manually - TODO + #Specifying a width is suitable for terminal-like applications and text-blocks + if {$opt_width ne "\uFFEF"} { + if {[llength $understacks]} { + set cs $u_codestack + set gs $u_gx_stack + } else { + set cs [list] + set gs [list] + } + if {[llength $undercols]< $opt_width} { + set diff [expr {$opt_width- [llength $undercols]}] + if {$diff > 0} { + lappend undercols {*}[lrepeat $diff " "] + lappend understacks {*}[lrepeat $diff $cs] + lappend understacks_gx {*}[lrepeat $diff $gs] + } + } + } + } else { + #NULL empty cell indicator + if {$opt_width ne "\uFFEF"} { + if {[llength $understacks]} { + set cs $u_codestack + set gs $u_gx_stack + } else { + set cs [list] + set gs [list] + } + if {[llength $undercols]< $opt_width} { + set diff [expr {$opt_width- [llength $undercols]}] + if {$diff > 0} { + lappend undercols {*}[lrepeat $diff "\u0000"] + lappend understacks {*}[lrepeat $diff $cs] + lappend understacks_gx {*}[lrepeat $diff $gs] + } + } + } + + } + if {$opt_width ne "\uFFEF"} { + set colwidth $opt_width + } else { + set colwidth [llength $undercols] + } + + + if 0 { + # ----------------- + # if we aren't extending understacks & understacks_gx each time we incr idx above the undercols length.. this doesn't really serve a purpose + # Review. + # ----------------- + #replay code for last overlay position in input line + # whether or not we get that far - we need to return it for possible replay on next line + if {[llength $understacks]} { + lappend understacks $u_codestack + lappend understacks_gx $u_gx_stack + } else { + #in case overlay onto emptystring as underlay + lappend understacks [list] + lappend understacks_gx [list] + } + # ----------------- + } + + #trailing codes in effect for underlay + if {[llength $u_codestack]} { + #set replay_codes_underlay [join $u_codestack ""] + set replay_codes_underlay [punk::ansi::codetype::sgr_merge_list {*}$u_codestack] + } else { + set replay_codes_underlay "" + } + + + # -- --- --- --- --- --- --- --- + #### + #if opt_colstart - we need to build a space (or any singlewidth char ?) padding on the left of the right number of columns. + #this will be processed as transparent - and handle doublewidth underlay characters appropriately + set startpad_overlay [string repeat " " [expr {$opt_colstart -1}]] + append startpad_overlay $overdata ;#overdata with left padding spaces based on col-start under will show through for left-padding portion regardless of -transparency + if {$startpad_overlay ne ""} { + if {[punk::ansi::ta::detect $startpad_overlay]} { + set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] + } else { + #single plaintext part + set overmap [list $startpad_overlay] + } + } else { + set overmap [list] + } + #set overmap [punk::ansi::ta::split_codes_single $startpad_overlay] + #### + + #??? + set colcursor $opt_colstart + #TODO - make a little virtual column object + #we need to refer to column1 or columnmin? or columnmax without calculating offsets due to to startcolumn + #need to lock-down what start column means from perspective of ANSI codes moving around - the offset perspective is unclear and a mess. + + + #set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + #as at 2024-02 punk::char::grapheme_split uses these - not aware of more complex graphemes + + set overstacks [list] + set overstacks_gx [list] + + set o_codestack [list]; #SGR codestack (not other codes such as movement,insert key etc) + set o_gxstack [list] + set pt_overchars "" + set i_o 0 + set overlay_grapheme_control_list [list] ;#tag each with g, sgr or other. 'other' are things like cursor-movement or insert-mode or codes we don't recognise/use + #experiment + set overlay_grapheme_control_stacks [list] + foreach {pt code} $overmap { + + #todo - wrap in test for empty pt (we used split_codes_single - and it may be common for sgr sequences to be unmerged and so have empty pts between) + if {$cp437_glyphs} { + set pt [tcl::string::map $cp437_map $pt] + } + append pt_overchars $pt + #will get empty pt between adjacent codes + foreach grapheme [punk::char::grapheme_split $pt] { + lappend overstacks $o_codestack + lappend overstacks_gx $o_gxstack + incr i_o + lappend overlay_grapheme_control_list [list g $grapheme] + lappend overlay_grapheme_control_stacks $o_codestack + } + + #only stack SGR (graphics rendition) codes - not title sets, cursor moves etc + #order of if-else based on assumptions: + # that pure resets are fairly common - more so than leading resets with other info + # that non-sgr codes are not that common, so ok to check for resets before verifying it is actually SGR at all. + if {$code ne ""} { + lappend overlay_grapheme_control_stacks $o_codestack + #there will always be an empty code at end due to foreach on 2 vars with odd-sized list ending with pt (overmap coming from perlish split) + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set o_codestack [list "\x1b\[m"] ;#reset better than empty list - fixes some ansi art issues + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set o_codestack [list $code] + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[priv::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $o_codestack $code] ;#must be -exact because of square-bracket glob chars + set o_codestack [lremove $o_codestack {*}$dup_posns] + lappend o_codestack $code + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[regexp {\x1b7|\x1b\[s} $code]} { + #experiment + #cursor_save - for the replays review. + #jmn + #set temp_cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + lappend overlay_grapheme_control_list [list other $code] + } elseif {[regexp {\x1b8|\x1b\[u} $code]} { + #experiment + #cursor_restore - for the replays + set o_codestack [list $temp_cursor_saved] + lappend overlay_grapheme_control_list [list other $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set o_gxstack [list "gx0_on"] + lappend overlay_grapheme_control_list [list gx0 gx0_on] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set o_gxstack [list] + lappend overlay_grapheme_control_list [list gx0 gx0_off] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend overlay_grapheme_control_list [list other $code] + } + } + } + + } + #replay code for last overlay position in input line - should take account of possible trailing sgr code after last grapheme + set max_overlay_grapheme_index [expr {$i_o -1}] + lappend overstacks $o_codestack + lappend overstacks_gx $o_gxstack + + #set replay_codes_overlay [join $o_codestack ""] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + + #if {[tcl::dict::exists $overstacks $max_overlay_grapheme_index]} { + # set replay_codes_overlay [join [tcl::dict::get $overstacks $max_overlay_grapheme_index] ""] + #} else { + # set replay_codes_overlay "" + #} + # -- --- --- --- --- --- --- --- + + + #potential problem - combinining diacritics directly following control chars like \r \b + + # -- --- --- + #we need to initialise overflow_idx before any potential row-movements - as they need to perform a loop break and force in_excess to 1 + if {$opt_overflow} { + #somewhat counterintuitively - overflow true means we can have lines as long as we want, but either way there can be excess data that needs to be thrown back to the calling loop. + set overflow_idx -1 + } else { + #overflow zero - we can't grow beyond our column width - so we get ellipsis or truncation + if {$opt_width ne "\uFFEF"} { + set overflow_idx [expr {$opt_width}] + } else { + #review - this is also the cursor position when adding a char at end of line? + set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it + } + } + # -- --- --- + + set outcols $undercols ;#leave undercols as is, outcols can potentially be appended to. + + set unapplied "" ;#if we break for move row (but not for /v ?) + set unapplied_list [list] + + set insert_lines_above 0 ;#return key + set insert_lines_below 0 + set instruction "" + + # -- --- --- + #cursor_save_dec, cursor_restore_dec etc + set cursor_restore_required 0 + set cursor_saved_attributes "" + set cursor_saved_position "" + # -- --- --- + + #set idx 0 ;# line index (cursor - 1) + #set idx [expr {$opt_colstart + $opt_colcursor} -1] + + #idx is the per column output index + set idx [expr {$opt_colcursor -1}] ;#don't use opt_colstart here - we have padded and won't start emitting until idx reaches opt_colstart-1 + #cursor_column is usually one above idx - but we have opt_colstart which is like a margin - todo: remove cursor_column from the following loop and calculate it's offset when breaking or at end. + #(for now we are incrementing/decrementing both in sync - which is a bit silly) + set cursor_column $opt_colcursor + + #idx_over is the per grapheme overlay index + set idx_over -1 + + + #movements only occur within the overlay range. + #an underlay is however not necessary.. e.g + #renderline -overflow 1 "" data + #foreach {pt code} $overmap {} + set insert_mode $opt_insert_mode ;#default 1 + set autowrap_mode $opt_autowrap_mode ;#default 1 + + #set re_mode {\x1b\[\?([0-9]*)(h|l)} ;#e.g DECAWM + #set re_col_move {\x1b\[([0-9]*)(C|D|G)$} + #set re_row_move {\x1b\[([0-9]*)(A|B)$} + #set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} ;# or "f" ? + #set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} + #set re_cursor_save {\x1b\[s$} ;#note probable incompatibility with DECSLRM (set left right margin)! + #set re_cursor_restore {\x1b\[u$} + #set re_cursor_save_dec {\x1b7$} + #set re_cursor_restore_dec {\x1b8$} + #set re_other_single {\x1b(D|M|E)$} + #set re_decstbm {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)r$} ;#DECSTBM set top and bottom margins + + #puts "-->$overlay_grapheme_control_list<--" + #puts "-->overflow_idx: $overflow_idx" + for {set gci 0} {$gci < [llength $overlay_grapheme_control_list]} {incr gci} { + set gc [lindex $overlay_grapheme_control_list $gci] + lassign $gc type item + + #emit plaintext chars first using existing SGR codes from under/over stack as appropriate + #then check if the following code is a cursor movement within the line and adjust index if so + #foreach ch $overlay_graphemes {} + switch -- $type { + g { + set ch $item + incr idx_over; #idx_over (until unapplied reached anyway) is per *grapheme* in the overlay - not per col. + if {($idx < ($opt_colstart -1))} { + incr idx [grapheme_width_cached $ch] + continue + } + #set within_undercols [expr {$idx <= [llength $undercols]-1}] ;#within our active data width + set within_undercols [expr {$idx <= $colwidth-1}] + + #https://www.enigma.com/resources/blog/the-secret-world-of-newline-characters + #\x85 NEL in the c1 control set is treated by some terminal emulators (e.g Hyper) as a newline, + #on some it's invisble but doesn't change the line, on some it's a visible glyph of width 1. + #This is hard to process in any standard manner - but I think the Hyper behaviour of doing what it was intended is perhaps most reasonable + #We will map it to the same behaviour as lf here for now... but we need also to consider the equivalent ANSI sequence: \x1bE + + set chtest [tcl::string::map [list \n \x85 \b \r \v \x7f ] $ch] + #puts --->chtest:$chtest + #specials - each shoud have it's own test of what to do if it happens after overflow_idx reached + switch -- $chtest { + "" { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + if {$idx == 0} { + #puts "---a at col 1" + #linefeed at column 1 + #leave the overflow_idx ;#? review + set instruction lf_start ;#specific instruction for newline at column 1 + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } elseif {$overflow_idx != -1 && $idx == $overflow_idx} { + #linefeed after final column + #puts "---c at overflow_idx=$overflow_idx" + incr cursor_row + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 + set instruction lf_overflow ;#only special treatment is to give it it's own instruction in case caller needs to handle differently + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } else { + #linefeed occurred in middle or at end of text + #puts "---mid-or-end-text-linefeed idx:$idx overflow_idx:$overflow_idx" + incr cursor_row + set overflow_idx $idx ;#override overflow_idx even if it was set to -1 due to opt_overflow = 1|2 + set instruction lf_mid + priv::render_unapplied $overlay_grapheme_control_list $gci + break + } + + } + "" { + #will we want/need to use raw for keypresses in terminal? (terminal with LNM in standard reset mode means enter= this is the usual config for terminals) + #So far we are assuming the caller has translated to and handle above.. REVIEW. + + #consider also the old space-carriagereturn softwrap convention used in some terminals. + #In the context of rendering to a block of text - this works similarly in that the space gets eaten so programs emitting space-cr at the terminal width col will pretty much get what they expect. + set idx [expr {$opt_colstart -1}] + set cursor_column $opt_colstart ;#? + } + "" { + #literal backspace char - not necessarily from keyboard + #review - backspace effect on double-width chars - we are taking a column-editing perspective in overtype + #(important for -transparent option - hence replacement chars for half-exposed etc) + #review - overstrike support as per nroff/less (generally considered an old technology replaced by unicode mechanisms and/or ansi SGR) + if {$idx > ($opt_colstart -1)} { + incr idx -1 + incr cursor_column -1 + } else { + set flag 0 + if $flag { + #review - conflicting requirements? Need a different sequence for destructive interactive backspace? + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction backspace_at_start + break + } + } + } + "" { + #literal del character - some terminals send just this for what is generally expected to be a destructive backspace + #We instead treat this as a pure delete at current cursor position - it is up to the repl or terminal to remap backspace key to a sequence that has the desired effect. + priv::render_delchar $idx + } + "" { + #end processing this overline. rest of line is remainder. cursor for column as is. + #REVIEW - this theoretically depends on terminal's vertical tabulation setting (name?) + #e.g it could be configured to jump down 6 rows. + #On the other hand I've seen indications that some modern terminal emulators treat it pretty much as a linefeed. + #todo? + incr cursor_row + set overflow_idx $idx + #idx_over has already been incremented as this is both a movement-control and in some sense a grapheme + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction vt + break + } + default { + if {$overflow_idx != -1} { + #review - how to check arbitrary length item such as tab is going to overflow .. before we get to overflow_idx? + #call grapheme_width_cached on each ch, or look for tab specifically as it's currently the only known reason to have a grapheme width > 2? + #we need to decide what a tab spanning the overflow_idx means and how it affects wrap etc etc + if {$idx == $overflow_idx-1} { + set owidth [grapheme_width_cached $ch] + if {$owidth == 2} { + #review split 2w overflow? + #we don't want to make the decision here to split a 2w into replacement characters at end of line and beginning of next line + #better to consider the overlay char as unable to be applied to the line + #render empty column(?) - and reduce overlay grapheme index by one so that the current ch goes into unapplied + #throwing back to caller with instruction complicates its job - but is necessary to avoid making decsions for it here. + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #change the overflow_idx + set overflow_idx $idx + incr idx + incr idx_over -1 ;#set overlay grapheme index back one so that sgr stack from previous overlay grapheme used + priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#note $gci-1 instead of just gci + #throw back to caller's loop - add instruction to caller as this is not the usual case + #caller may for example choose to render a single replacement char to this line and omit the grapheme, or wrap it to the next line + set instruction overflow_splitchar + break + } elseif {$owidth > 2} { + #? tab? + #TODO! + puts stderr "overtype::renderline long overtext grapheme '[ansistring VIEW -lf 1 -vt 1 $ch]' not handled" + #tab of some length dependent on tabstops/elastic tabstop settings? + } + } elseif {$idx >= $overflow_idx} { + #jmn? + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci-1]] + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #don't incr idx beyond the overflow_idx + #idx_over already incremented - decrement so current overlay grapheme stacks go to unapplied + incr idx_over -1 + #priv::render_unapplied $overlay_grapheme_control_list [expr {$gci-1}] ;#back one index here too + priv::render_this_unapplied $overlay_grapheme_control_list $gci ;# + set instruction overflow + break + } + } else { + #review. + #This corresponds to opt_overflow being true (at least until overflow_idx is in some cases forced to a value when throwing back to calling loop) + } + + if {($do_transparency && [regexp $opt_transparent $ch])} { + #pre opt_colstart is effectively transparent (we have applied padding of required number of columns to left of overlay) + if {$idx > [llength $outcols]-1} { + lappend outcols " " + #tcl::dict::set understacks $idx [list] ;#review - use idx-1 codestack? + lset understacks $idx [list] + incr idx + incr cursor_column + } else { + #todo - punk::char::char_width + set g [lindex $outcols $idx] + set uwidth [grapheme_width_cached $g] + if {[lindex $outcols $idx] eq ""} { + #2nd col of 2-wide char in underlay + incr idx + incr cursor_column + } elseif {$uwidth == 0} { + #e.g control char ? combining diacritic ? + incr idx + incr cursor_column + } elseif {$uwidth == 1} { + set owidth [grapheme_width_cached $ch] + incr idx + incr cursor_column + if {$owidth > 1} { + incr idx + incr cursor_column + } + } elseif {$uwidth > 1} { + if {[grapheme_width_cached $ch] == 1} { + if {!$insert_mode} { + #normal singlewide transparent overlay onto double-wide underlay + set next_pt_overchar [tcl::string::index $pt_overchars $idx_over+1] ;#lookahead of next plain-text char in overlay + if {$next_pt_overchar eq ""} { + #special-case trailing transparent - no next_pt_overchar + incr idx + incr cursor_column + } else { + if {[regexp $opt_transparent $next_pt_overchar]} { + incr idx + incr cursor_column + } else { + #next overlay char is not transparent.. first-half of underlying 2wide char is exposed + #priv::render_addchar $idx $opt_exposed1 [tcl::dict::get $overstacks $idx_over] [tcl::dict::get $overstacks_gx $idx_over] $insert_mode + priv::render_addchar $idx $opt_exposed1 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + } + } + } else { + #? todo - decide what transparency even means for insert mode + incr idx + incr cursor_column + } + } else { + #2wide transparency over 2wide in underlay - review + incr idx + incr cursor_column + } + } + } + } else { + + set idxchar [lindex $outcols $idx] + #non-transparent char in overlay or empty cell + if {$idxchar eq "\u0000"} { + #empty/erased cell indicator + set uwidth 1 + } else { + set uwidth [grapheme_width_cached $idxchar] + } + if {$within_undercols} { + if {$idxchar eq ""} { + #2nd col of 2wide char in underlay + if {!$insert_mode} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 + #JMN - this has to expose if our startposn chopped an underlay - but not if we already overwrote the first half of the widechar underlay grapheme + #e.g renderline \uFF21\uFF21--- a\uFF23\uFF23 + #vs + # renderline -startcolumn 2 \uFF21---- \uFF23 + if {[lindex $outcols $idx-1] != ""} { + #verified it's an empty following a filled - so it's a legit underlay remnant (REVIEW - when would it not be??) + #reset previous to an exposed 1st-half - but leave understacks code as is + priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 + } + incr idx + } else { + set prevcolinfo [lindex $outcols $idx-1] + #for insert mode - first replace the empty 2ndhalf char with exposed2 before shifting it right + #REVIEW - this leaves a replacement character permanently in our columns.. but it is consistent regarding length (?) + #The alternative is to disallow insertion at a column cursor that is at 2nd half of 2wide char + #perhaps by inserting after the char - this may be worthwhile - but may cause other surprises + #It is perhaps best avoided at another level and try to make renderline do exactly as it's told + #the advantage of this 2w splitting method is that the inserted character ends up in exactly the column we expect. + priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 0 ;#replace not insert + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] 1 ;#insert - same index + if {$prevcolinfo ne ""} { + #we've split the 2wide - it may already have been rendered as an exposed1 - but not for example if our startcolumn was current idx + priv::render_addchar [expr {$idx-1}] $opt_exposed1 [lindex $understacks $idx-1] [lindex $understacks_gx $idx-1] 0 ;#replace not insert + } ;# else?? + incr idx + } + if {$cursor_column < [llength $outcols] || $overflow_idx == -1} { + incr cursor_column + } + } elseif {$uwidth == 0} { + #what if this is some other c0/c1 control we haven't handled specifically? + + #by emitting a preceding empty-string column - we associate whatever this char is with the preceeding non-zero-length character and any existing zero-lengths that follow it + #e.g combining diacritic - increment before over char REVIEW + #arguably the previous overchar should have done this - ie lookahead for combiners? + #if we can get a proper grapheme_split function - this should be easier to tidy up. + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column 2 + + if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { + set cursor_column [llength $outcols] + } + } elseif {$uwidth == 1} { + #includes null empty cells + set owidth [grapheme_width_cached $ch] + if {$owidth == 1} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + } else { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + priv::render_addchar $idx "" [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #if next column in underlay empty - we've overwritten first half of underlying 2wide grapheme + #replace with rhs exposure in case there are no more overlay graphemes coming - use underlay's stack + if {([llength $outcols] >= $idx +2) && [lindex $outcols $idx+1] eq ""} { + priv::render_addchar [expr {$idx+1}] $opt_exposed2 [lindex $understacks $idx+1] [lindex $understacks_gx $idx+1] $insert_mode + } + incr idx + } + if {($cursor_column < [llength $outcols]) || $overflow_idx == -1 || $test_mode} { + incr cursor_column + } + } elseif {$uwidth > 1} { + set owidth [grapheme_width_cached $ch] + if {$owidth == 1} { + #1wide over 2wide in underlay + if {!$insert_mode} { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + priv::render_addchar $idx $opt_exposed2 [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + #don't incr idx - we are just putting a broken-indication in the underlay - which may get overwritten by next overlay char + } else { + #insert mode just pushes all to right - no exposition char here + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + } + } else { + #2wide over 2wide + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx 2 + incr cursor_column 2 + } + + if {$cursor_column > [llength $outcols] && $overflow_idx != -1} { + set cursor_column [llength $outcols] + } + } + } else { + priv::render_addchar $idx $ch [lindex $overstacks $idx_over] [lindex $overstacks_gx $idx_over] $insert_mode + incr idx + incr cursor_column + if {$overflow_idx !=-1 && !$test_mode} { + #overflow + if {$cursor_column > [llength $outcols]} { + set cursor_column [llength $outcols] + } + } + } + } + } + } ;# end switch + + + } + other { + #todo - consider CSI s DECSLRM vs ansi.sys \x1b\[s - we need \x1b\[s for oldschool ansi art - but may have to enable only for that. + #we should probably therefore reverse this mapping so that x1b7 x1b8 are the primary codes for save/restore + set code [tcl::string::map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $item] + #since this element isn't a grapheme - advance idx_over to next grapheme overlay when about to fill 'unapplied' + + set matchinfo [list] + + #remap of DEC cursor_save/cursor_restore from ESC sequence to equivalent CSI + #probably not ideal - consider putting cursor_save/cursor_restore in functions so they can be called from the appropriate switch branch instead of using this mapping + #review - cost/benefit of function calls within these switch-arms instead of inline code? + + set c1 [tcl::string::index $code 0] + set c1c2c3 [tcl::string::range $code 0 2] + #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + #tcl 8.7 - faster to use inline list than to store it in a local var outside of loop. + #(surprising - but presumably ) + set leadernorm [tcl::string::range [tcl::string::map [list\ + \x1b\[< 1006\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\] 7OSC\ + \x9d 8OSC\ + \x1b 7ESC\ + ] $c1c2c3] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars + + #we leave the tail of the code unmapped for now + switch -- $leadernorm { + 1006 { + #https://invisible-island.net/xterm/ctlseqs/ctlseqs.html + #SGR (1006) CSI < followed by colon separated encoded-button-value,px,py ordinates and final M for button press m for button release + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 3 end]] + } + 7CSI - 7OSC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 2 end]] + } + 7ESC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] + } + 8CSI - 8OSC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] + } + default { + #we haven't made a mapping for this + set codenorm $code + } + } + + #we've mapped 7 and 8bit escapes to values we can handle as literals in switch statements to take advantange of jump tables. + switch -- $leadernorm { + 1006 { + #TODO + # + switch -- [tcl::string::index $codenorm end] { + M { + puts stderr "mousedown $codenorm" + } + m { + puts stderr "mouseup $codenorm" + } + } + + } + {7CSI} - {8CSI} { + set param [tcl::string::range $codenorm 4 end-1] + #puts stdout "--> CSI [tcl::string::index $leadernorm 0] bit param:$param" + switch -- [tcl::string::index $codenorm end] { + D { + #Col move + #puts stdout "<-back" + #cursor back + #left-arrow/move-back when ltr mode + set num $param + if {$num eq ""} {set num 1} + + set version 2 + if {$version eq "2"} { + #todo - startcolumn offset! + if {$cursor_column - $num >= 1} { + incr idx -$num + incr cursor_column -$num + } else { + if {!$autowrap_mode} { + set cursor_column 1 + set idx 0 + } else { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + incr cursor_column -$num + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction wrapmovebackward + break + } + } + } else { + incr idx -$num + incr cursor_column -$num + if {$idx < $opt_colstart-1} { + #wrap to previous line and position cursor at end of data + set idx [expr {$opt_colstart-1}] + set cursor_column $opt_colstart + } + } + } + C { + #Col move + #puts stdout "->forward" + #todo - consider right-to-left cursor mode (e.g Hebrew).. some day. + #cursor forward + #right-arrow/move forward + set num $param + if {$num eq ""} {set num 1} + + #todo - retrict to moving 1 position past datalen? restrict to column width? + #should ideally wrap to next line when interactive and not on last row + #(some ansi art seems to expect this behaviour) + #This presumably depends on the terminal's wrap mode + #e.g DECAWM autowrap mode + # CSI ? 7 h - set: autowrap (also tput smam) + # CSI ? 7 l - reset: no autowrap (also tput rmam) + set version 2 + if {$version eq "2"} { + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$test_mode && $cursor_column == $max+1} { + #move_forward while in overflow + incr cursor_column -1 + } + + if {($cursor_column + $num) <= $max} { + incr idx $num + incr cursor_column $num + } else { + if {$autowrap_mode} { + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #jmn + if {$idx == $overflow_idx} { + incr num + } + + #horizontal movement beyond line extent needs to wrap - throw back to caller + #we may have both overflow_rightand unapplied data + #(can have overflow_right if we were in insert_mode and processed chars prior to this movement) + #leave row as is - caller will need to determine how many rows the column-movement has consumed + incr cursor_column $num ;#give our caller the necessary info as columns from start of row + #incr idx_over + #should be gci following last one applied + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction wrapmoveforward + break + } else { + set cursor_column $max + set idx [expr {$cursor_column -1}] + } + } + } else { + if {!$opt_overflow || ($cursor_column + $num) <= [llength $outcols+1]} { + incr idx $num + incr cursor_column $num + } else { + if {!$insert_mode} { + #block editing style with arrow keys + #overtype mode + set idxstart $idx + set idxend [llength $outcols] + set moveend [expr {$idxend - $idxstart}] + if {$moveend < 0} {set moveend 0} ;#sanity? + #puts "idxstart:$idxstart idxend:$idxend outcols[llength $outcols] undercols:[llength $undercols]" + incr idx $moveend + incr cursor_column $moveend + #if {[tcl::dict::exists $understacks $idx]} { + # set stackinfo [tcl::dict::get $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext + #} else { + # set stackinfo [list] + #} + if {$idx < [llength $understacks]} { + set stackinfo [lindex $understacks $idx] ;#use understack at end - which may or may not have already been replaced by stack from overtext + } else { + set stackinfo [list] + } + if {$idx < [llength $understacks_gx]} { + #set gxstackinfo [tcl::dict::get $understacks_gx $idx] + set gxstackinfo [lindex $understacks_gx $idx] + } else { + set gxstackinfo [list] + } + #pad outcols + set movemore [expr {$num - $moveend}] + #assert movemore always at least 1 or we wouldn't be in this branch + for {set m 1} {$m <= $movemore} {incr m} { + incr idx + incr cursor_column + priv::render_addchar $idx " " $stackinfo $gxstackinfo $insert_mode + } + } else { + #normal - insert + incr idx $num + incr cursor_column $num + if {$idx > [llength $outcols]} { + set idx [llength $outcols];#allow one beyond - for adding character at end of line + set cursor_column [expr {[llength $outcols]+1}] + } + } + } + } + } + G { + #Col move + #move absolute column + #adjust to colstart - as column 1 is within overlay + #??? + set idx [expr {$param + $opt_colstart -1}] + set cursor_column $param + error "renderline absolute col move ESC G unimplemented" + } + A { + #Row move - up + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set num $param + if {$num eq ""} {set num 1} + incr cursor_row -$num + + if {$cursor_row < 1} { + set cursor_row 1 + } + + #ensure rest of *overlay* is emitted to remainder + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction up + #retain cursor_column + break + } + B { + #Row move - down + set num $param + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #move down + if {$num eq ""} {set num 1} + incr cursor_row $num + + + incr idx_over ;#idx_over hasn't encountered a grapheme and hasn't advanced yet + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction down + #retain cursor_column + break + } + H - f { + #$re_both_move + lassign [split $param {;}] row col + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #lassign $matchinfo _match row col + + if {$col eq ""} {set col 1} + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$col > $max} { + set cursor_column $max + } else { + set cursor_column $col + } + set idx [expr {$cursor_column -1}] + + if {$row eq ""} {set row 1} + set cursor_row $row + if {$cursor_row < 1} { + set cursor_row 1 + } + + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction move + break + + } + X { + puts stderr "X - $param" + #ECH - erase character + if {$param eq "" || $param eq "0"} {set param 1}; #param=count of chars to erase + priv::render_erasechar $idx $param + #cursor position doesn't change. + } + r { + #$re_decstbm + #https://www.vt100.net/docs/vt510-rm/DECSTBM.html + #This control function sets the top and bottom margins for the current page. You cannot perform scrolling outside the margins + lassign [split $param {;}] margin_top margin_bottom + + #todo - return these for the caller to process.. + puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + set cursor_column 1 + set cursor_row 1 + + incr idx_over + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction move ;#own instruction? decstbm? + break + } + s { + # - todo - make ansi.sys CSI s cursor save only apply for certain cases? - may need to support DECSLRM instead which uses same code + + #$re_cursor_save + #cursor save could come after last column + if {$overflow_idx != -1 && $idx == $overflow_idx} { + #bartman2.ans test file - fixes misalignment at bottom of dialog bubble + #incr cursor_row + #set cursor_column 1 + #bwings1.ans test file - breaks if we actually incr cursor (has repeated saves) + set cursor_saved_position [list row [expr {$cursor_row+1}] column 1] + } else { + set cursor_saved_position [list row $cursor_row column $cursor_column] + } + #there may be overlay stackable codes emitted that aren't in the understacks because they come between the last emmited character and the cursor_save control. + #we need the SGR and gx overlay codes prior to the cursor_save + + #a real terminal would not be able to know the state of the underlay.. so we should probably ignore it. + #set sgr_stack [lindex $understacks $idx] + #set gx_stack [lindex $understacks_gx $idx] ;#not actually a stack - just a boolean state (for now?) + + set sgr_stack [list] + set gx_stack [list] + + #we shouldn't need to scan for intermediate cursor save/restores - as restores would throw-back to the calling loop - so our overlay 'line' is since those. + #The overlay_grapheme_control_list had leading resets from previous lines - so we go back to the beginning not just the first grapheme. + + foreach gc [lrange $overlay_grapheme_control_list 0 $gci-1] { + lassign $gc type code + #types g other sgr gx0 + switch -- $type { + gx0 { + #code is actually a stand-in for the graphics on/off code - not the raw code + #It is either gx0_on or gx0_off + set gx_stack [list $code] + } + sgr { + #code is the raw code + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #jmn + set sgr_stack [list "\x1b\[m"] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set sgr_stack [list $code] + lappend overlay_grapheme_control_list [list sgr $code] + } elseif {[priv::is_sgr $code]} { + #often we don't get resets - and codes just pile up. + #as a first step to simplifying - at least remove earlier straight up dupes + set dup_posns [lsearch -all -exact $sgr_stack $code] ;#needs -exact - codes have square-brackets (glob chars) + set sgr_stack [lremove $sgr_stack {*}$dup_posns] + lappend sgr_stack $code + } + } + } + } + set cursor_saved_attributes "" + switch -- [lindex $gx_stack 0] { + gx0_on { + append cursor_saved_attributes "\x1b(0" + } + gx0_off { + append cursor_saved_attributes "\x1b(B" + } + } + #append cursor_saved_attributes [join $sgr_stack ""] + append cursor_saved_attributes [punk::ansi::codetype::sgr_merge_list {*}$sgr_stack] + + #as there is apparently only one cursor storage element we don't need to throw back to the calling loop for a save. + + #don't incr index - or the save will cause cursor to move to the right + #carry on + + } + u { + #$re_cursor_restore + #we are going to jump somewhere.. for now we will assume another line, and process accordingly. + #The caller has the cursor_saved_position/cursor_saved_attributes if any (?review - if we always pass it back it, we could save some calls for moves in same line) + #don't set overflow at this point. The existing underlay to the right must be preserved. + #we only want to jump and render the unapplied at the new location. + + #lset overstacks $idx_over [list] + #set replay_codes_overlay "" + + #if {$cursor_saved_attributes ne ""} { + # set replay_codes_overlay $cursor_saved_attributes ;#empty - or last save if it happend in this input chunk + #} else { + #jj + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay "" + #} + + #like priv::render_unapplied - but without the overlay's ansi reset or gx stacks from before the restore code + incr idx_over + + set unapplied "" + set unapplied_list [list] + foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { + lassign $gc type item + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + lappend unapplied_list "\x1b(0" + } elseif {$item eq "gx0_off"} { + lappend unapplied_list "\x1b(B" + } + } else { + lappend unapplied_list $item + } + #incr idx_over + } + set unapplied [join $unapplied_list ""] + #if the save occured within this line - that's ok - it's in the return value list and caller can prepend for the next loop. + set instruction restore_cursor + break + } + ~ { + #$re_vt_sequence + #lassign $matchinfo _match key mod + lassign [split $param {;}] key mod + + #Note that f1 to f4 show as ESCOP|Q|R|S (VT220?) but f5+ show as ESC\[15~ + # + #e.g esc \[2~ insert esc \[2;2~ shift-insert + #mod - subtract 1, and then use bitmask + #shift = 1, (left)Alt = 2, control=4, meta=8 (meta seems to do nothing on many terminals on windows? Intercepted by windows?) + #puts stderr "vt key:$key mod:$mod code:[ansistring VIEW $code]" + if {$key eq "1"} { + #home + } elseif {$key eq "2"} { + #Insert + if {$mod eq ""} { + #no modifier key + set insert_mode [expr {!$insert_mode}] + #rather than set the cursor - we return the insert mode state so the caller can decide + } + } elseif {$key eq "3"} { + #Delete - presumably this shifts other chars in the line, with empty cells coming in from the end + switch -- $mod { + "" { + priv::render_delchar $idx + } + "5" { + #ctrl-del - delete to end of word (pwsh) - possibly word on next line if current line empty(?) + } + } + } elseif {$key eq "4"} { + #End + } elseif {$key eq "5"} { + #pgup + } elseif {$key eq "6"} { + #pgDn + } elseif {$key eq "7"} { + #Home + #?? + set idx [expr {$opt_colstart -1}] + set cursor_column 1 + } elseif {$key eq "8"} { + #End + } elseif {$key eq "11"} { + #F1 - or ESCOP or e.g shift F1 ESC\[1;2P + } elseif {$key eq "12"} { + #F2 - or ESCOQ + } elseif {$key eq "13"} { + #F3 - or ESCOR + } elseif {$key eq "14"} { + #F4 - or ESCOS + } elseif {$key eq "15"} { + #F5 or shift F5 ESC\[15;2~ + } elseif {$key eq "17"} { + #F6 + } elseif {$key eq "18"} { + #F7 + } elseif {$key eq "19"} { + #F8 + } elseif {$key eq "20"} { + #F9 + } elseif {$key eq "21"} { + #F10 + } elseif {$key eq "23"} { + #F11 + } elseif {$key eq "24"} { + #F12 + } + + } + h - l { + #we are matching only last char to get to this arm - but are there other sequences ending in h|l we need to handle? + + #$re_mode if first after CSI is "?" + #some docs mention ESC=h|l - not seen on windows terminals.. review + #e.g https://www2.math.upenn.edu/~kazdan/210/computer/ansi.html + if {[tcl::string::index $codenorm 4] eq "?"} { + set num [tcl::string::range $codenorm 5 end-1] ;#param between ? and h|l + #lassign $matchinfo _match num type + switch -- $num { + 5 { + #DECSNM - reverse video + #How we simulate this to render within a block of text is an open question. + #track all SGR stacks and constantly flip based on the current SGR reverse state? + #It is the job of the calling loop to do this - so at this stage we'll just set the states + #DECAWM autowrap + if {$type eq "h"} { + #set (enable) + set reverse_mode 1 + } else { + #reset (disable) + set reverse_mode 0 + } + + } + 7 { + #DECAWM autowrap + if {$type eq "h"} { + #set (enable) + set autowrap_mode 1 + if {$opt_width ne "\uFFEF"} { + set overflow_idx $opt_width + } else { + #review - this is also the cursor position when adding a char at end of line? + set overflow_idx [expr {[llength $undercols]}] ;#index at which we would be *in* overflow a row move may still override it + } + #review - can idx ever be beyond overflow_idx limit when we change e.g with a width setting and cursor movements? presume not - but sanity check for now. + if {$idx >= $overflow_idx} { + puts stderr "renderline error - idx '$idx' >= overflow_idx '$overflow_idx' - unexpected" + } + } else { + #reset (disable) + set autowrap_mode 0 + set overflow_idx -1 + } + } + 25 { + if {$type eq "h"} { + #visible cursor + + } else { + #invisible cursor + + } + } + } + + } else { + puts stderr "overtype::renderline CSI...h|l code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + default { + puts stderr "overtype::renderline CSI code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + } + 7ESC { + #$re_other_single + switch -- [tcl::string::index $codenorm end] { + D { + #\x84 + #index (IND) + #vt102-docs: "Moves cursor down one line in same column. If cursor is at bottom margin, screen performs a scroll-up" + puts stderr "ESC D not fully implemented" + incr cursor_row + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction down + #retain cursor_column + break + } + M { + #\x8D + #Reverse Index (RI) + #vt102-docs: "Moves cursor up one line in same column. If cursor is at top margin, screen performs a scroll-down" + puts stderr "ESC M not fully implemented" + + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #move up + incr cursor_row -1 + if {$cursor_row < 1} { + set cursor_row 1 + } + #ensure rest of *overlay* is emitted to remainder + priv::render_unapplied $overlay_grapheme_control_list $gci + set instruction up ;#need instruction for scroll-down? + #retain cursor_column + break + } + E { + #\x85 + #review - is behaviour different to lf? + #todo - possibly(?) same logic as handling above. i.e return instruction depends on where column_cursor is at the time we get NEL + #leave implementation until logic for is set in stone... still under review + #It's arguable NEL is a pure cursor movement as opposed to the semantic meaning of crlf or lf in a file. + # + #Next Line (NEL) "Move the cursor to the left margin on the next line. If the cursor is at the bottom margin, scroll the page up" + puts stderr "ESC E unimplemented" + + } + default { + puts stderr "overtype::renderline ESC code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + } + } + + } + } + + #switch -regexp -matchvar matchinfo -- $code\ + #$re_mode { + #}\ + #default { + # puts stderr "overtype::renderline code [ansistring VIEW -lf 1 -vt 1 -nul 1 $code] not implemented" + #} + + } + default { + #don't need to handle sgr or gx0 types + #we have our sgr gx0 codes already in stacks for each overlay grapheme + } + } + } + + #-------- + if {$opt_overflow == 0} { + #need to truncate to the width of the original undertext + #review - string_width vs printing_length here. undertext requirement to be already rendered therefore punk::char::string_width ok? + #set num_under_columns [punk::char::string_width $pt_underchars] ;#plaintext underchars + } + if {$overflow_idx == -1} { + #overflow was initially unlimited and hasn't been overridden + } else { + + } + #-------- + + + #coalesce and replay codestacks for outcols grapheme list + set outstring "" ;#output prior to overflow + set overflow_right "" ;#remainder after overflow point reached + set i 0 + set cstack [list] + set prevstack [list] + set prev_g0 [list] + #note overflow_idx may already have been set lower if we had a row move above due to \v or ANSI moves + set in_overflow 0 ;#used to stop char-width scanning once in overflow + if {$overflow_idx == 0} { + #how does caller avoid an infinite loop if they have autowrap on and keep throwing graphemes to the next line? REVIEW + set in_overflow 1 + } + foreach ch $outcols { + #puts "---- [ansistring VIEW $ch]" + + set gxleader "" + if {$i < [llength $understacks_gx]} { + #set g0 [tcl::dict::get $understacks_gx $i] + set g0 [lindex $understacks_gx $i] + if {$g0 ne $prev_g0} { + if {$g0 eq [list "gx0_on"]} { + set gxleader "\x1b(0" + } else { + set gxleader "\x1b(B" + } + } + set prev_g0 $g0 + } else { + set prev_g0 [list] + } + + set sgrleader "" + if {$i < [llength $understacks]} { + #set cstack [tcl::dict::get $understacks $i] + set cstack [lindex $understacks $i] + if {$cstack ne $prevstack} { + if {[llength $prevstack] && ![llength $cstack]} { + #This reset is important e.g testfile fruit.ans - we get overhang on rhs without it. But why is cstack empty? + append sgrleader \033\[m + } else { + append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + } + } + set prevstack $cstack + } else { + set prevstack [list] + } + + + + if {$in_overflow} { + if {$i == $overflow_idx} { + set 0 [lindex $understacks_gx $i] + set gxleader "" + if {$g0 eq [list "gx0_on"]} { + set gxleader "\x1b(0" + } elseif {$g0 eq [list "gx0_off"]} { + set gxleader "\x1b(B" + } + append overflow_right $gxleader + set cstack [lindex $understacks $i] + set sgrleader "" + #whether cstack is same or differs from previous char's stack - we must have an output at the start of the overflow_right + #if {[llength $prevstack] && ![llength $cstack]} { + # append sgrleader \033\[m + #} + append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + append overflow_right $sgrleader + append overflow_right $ch + } else { + append overflow_right $gxleader + append overflow_right $sgrleader + append overflow_right $ch + } + } else { + if {$overflow_idx != -1 && $i+1 == $overflow_idx} { + #one before overflow + #will be in overflow in next iteration + set in_overflow 1 + if {[grapheme_width_cached $ch]> 1} { + #we overflowed with second-half of a double-width char - replace first-half with user-supplied exposition char (should be 1 wide) + set ch $opt_exposed1 + } + } + append outstring $gxleader + append outstring $sgrleader + if {$idx+1 < $cursor_column} { + append outstring [tcl::string::map {\u0000 " "} $ch] + } else { + append outstring $ch + } + } + incr i + } + #flower.ans good test for null handling - reverse line building + if {![ansistring length $overflow_right]} { + set outstring [tcl::string::trimright $outstring "\u0000"] + } + set outstring [tcl::string::map {\u0000 " "} $outstring] + set overflow_right [tcl::string::trimright $overflow_right "\u0000"] + set overflow_right [tcl::string::map {\u0000 " "} $overflow_right] + + set replay_codes "" + if {[llength $understacks] > 0} { + if {$overflow_idx == -1} { + #set tail_idx [tcl::dict::size $understacks] + set tail_idx [llength $understacks] + } else { + set tail_idx [llength $undercols] + } + if {$tail_idx-1 < [llength $understacks]} { + #set replay_codes [join [lindex $understacks $tail_idx-1] ""] ;#tail replay codes + set replay_codes [punk::ansi::codetype::sgr_merge_list {*}[lindex $understacks $tail_idx-1]] ;#tail replay codes + } + if {$tail_idx-1 < [llength $understacks_gx]} { + set gx0 [lindex $understacks_gx $tail_idx-1] + if {$gx0 eq [list "gx0_on"]} { + #if it was on, turn gx0 off at the point we stop processing overlay + append outstring "\x1b(B" + } + } + } + if {[string length $overflow_right]} { + #puts stderr "remainder:$overflow_right" + } + #pdict $understacks + + if {[punk::ansi::ta::detect_sgr $outstring]} { + append outstring [punk::ansi::a] ;#without this - we would get for example, trailing backgrounds after rightmost column + + #close off any open gx? + #probably should - and overflow_right reopen? + } + + if {$opt_returnextra} { + #replay_codes is the codestack at the boundary - used for ellipsis colouring to match elided text - review + #replay_codes_underlay is the set of codes in effect at the very end of the original underlay + + #review + #replay_codes_overlay is the set of codes in effect at the very end of the original overlay (even if not all overlay was applied) + #todo - replay_codes for gx0 mode + + #overflow_idx may change during ansi & character processing + if {$overflow_idx == -1} { + set overflow_right_column "" + } else { + set overflow_right_column [expr {$overflow_idx+1}] + } + set result [tcl::dict::create\ + result $outstring\ + visualwidth [punk::ansi::printing_length $outstring]\ + instruction $instruction\ + stringlen [string length $outstring]\ + overflow_right_column $overflow_right_column\ + overflow_right $overflow_right\ + unapplied $unapplied\ + unapplied_list $unapplied_list\ + insert_mode $insert_mode\ + autowrap_mode $autowrap_mode\ + insert_lines_above $insert_lines_above\ + insert_lines_below $insert_lines_below\ + cursor_saved_position $cursor_saved_position\ + cursor_saved_attributes $cursor_saved_attributes\ + cursor_column $cursor_column\ + cursor_row $cursor_row\ + opt_overflow $opt_overflow\ + replay_codes $replay_codes\ + replay_codes_underlay $replay_codes_underlay\ + replay_codes_overlay $replay_codes_overlay\ + ] + if {$opt_returnextra == 1} { + return $result + } else { + #human/debug - map special chars to visual glyphs + set viewop VIEW + switch -- $opt_returnextra { + 2 { + #codes and character data + set viewop VIEWCODES ;#ansi colorisation of codes - green for SGR, blue/blue reverse for cursor_save/cursor_restore, cyan for movements, orange for others + } + 3 { + set viewop VIEWSTYLE ;#ansi colorise the characters within the output with preceding codes, stacking codes only within each dict value - may not be same SGR effect as the effect in-situ. + } + } + tcl::dict::set result result [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result result]] + tcl::dict::set result overflow_right [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result overflow_right]] + tcl::dict::set result unapplied [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result unapplied]] + tcl::dict::set result unapplied_list [ansistring VIEW -lf 1 -vt 1 [tcl::dict::get $result unapplied_list]] + tcl::dict::set result replay_codes [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes]] + tcl::dict::set result replay_codes_underlay [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes_underlay]] + tcl::dict::set result replay_codes_overlay [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result replay_codes_overlay]] + tcl::dict::set result cursor_saved_attributes [ansistring $viewop -lf 1 -vt 1 [tcl::dict::get $result cursor_saved_attributes]] + return $result + } + } else { + return $outstring + } + #return [join $out ""] + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace overtype ---}] +} + +tcl::namespace::eval overtype::piper { + proc overcentre {args} { + if {[llength $args] < 2} { + error {usage: ?-bias left|right? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} + } + lassign [lrange $args end-1 end] over under + set argsflags [lrange $args 0 end-2] + tailcall overtype::centre {*}$argsflags $under $over + } + proc overleft {args} { + if {[llength $args] < 2} { + error {usage: ?-startcolumn ? ?-transparent [0|1|]? ?-exposed1 ? ?-exposed2 ? ?-overflow [1|0]? overtext pipelinedata} + } + lassign [lrange $args end-1 end] over under + set argsflags [lrange $args 0 end-2] + tailcall overtype::left {*}$argsflags $under $over + } +} + + +# -- --- --- --- --- --- --- --- --- --- --- +proc overtype::transparentline {args} { + foreach {under over} [lrange $args end-1 end] break + set argsflags [lrange $args 0 end-2] + set defaults [tcl::dict::create\ + -transparent 1\ + -exposed 1 " "\ + -exposed 2 " "\ + ] + set newargs [tcl::dict::merge $defaults $argsflags] + tailcall overtype::renderline {*}$newargs $under $over +} +#renderline may not make sense as it is in the long run for blocks of text - but is handy in the single-line-handling form anyway. +# We are trying to handle ansi codes in a block of text which is acting like a mini-terminal in some sense. +#We can process standard cursor moves such as \b \r - but no way to respond to other cursor movements e.g moving to other lines. +# +tcl::namespace::eval overtype::piper { + proc renderline {args} { + if {[llength $args] < 2} { + error {usage: ?-start ? ?-transparent [0|1|]? ?-overflow [1|0]? overtext pipelinedata} + } + foreach {over under} [lrange $args end-1 end] break + set argsflags [lrange $args 0 end-2] + tailcall overtype::renderline {*}$argsflags $under $over + } +} +interp alias "" piper_renderline "" overtype::piper::renderline + +#intended for single grapheme - but will work for multiple +#cannot contain ansi or newlines +#(a cache of ansifreestring_width calls - as these are quite regex heavy) +proc overtype::grapheme_width_cached {ch} { + variable grapheme_widths + if {[tcl::dict::exists $grapheme_widths $ch]} { + return [tcl::dict::get $grapheme_widths $ch] + } + set width [punk::char::ansifreestring_width $ch] + tcl::dict::set grapheme_widths $ch $width + return $width +} + + + +proc overtype::test_renderline {} { + set t \uFF5E ;#2-wide tilde + set u \uFF3F ;#2-wide underscore + set missing \uFFFD + return [list $t $u A${t}B] +} + +#maintenance warning +#same as textblock::size - but we don't want that circular dependency +#block width and height can be tricky. e.g \v handled differently on different terminal emulators and can affect both +proc overtype::blocksize {textblock} { + if {$textblock eq ""} { + return [tcl::dict::create width 0 height 1] ;#no such thing as zero-height block - for consistency with non-empty strings having no line-endings + } + if {[tcl::string::first \t $textblock] >= 0} { + if {[info exists punk::console::tabwidth]} { + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + set textblock [textutil::tabify::untabify2 $textblock $tw] + } + #ansistrip on entire block in one go rather than line by line - result should be the same - review - make tests + if {[punk::ansi::ta::detect $textblock]} { + set textblock [punk::ansi::ansistrip $textblock] + } + if {[tcl::string::last \n $textblock] >= 0} { + set num_le [expr {[tcl::string::length $textblock]-[tcl::string::length [tcl::string::map {\n {}} $textblock]]}] ;#faster than splitting into single-char list + set width [tcl::mathfunc::max {*}[lmap v [split $textblock \n] {::punk::char::ansifreestring_width $v}]] + } else { + set num_le 0 + set width [punk::char::ansifreestring_width $textblock] + } + #our concept of block-height is likely to be different to other line-counting mechanisms + set height [expr {$num_le + 1}] ;# one line if no le - 2 if there is one trailing le even if no data follows le + + return [tcl::dict::create width $width height $height] ;#maintain order in 'image processing' standard width then height - caller may use lassign [dict values [blocksize ]] width height +} + +tcl::namespace::eval overtype::priv { + variable cache_is_sgr [tcl::dict::create] + + #we are likely to be asking the same question of the same ansi codes repeatedly + #caching the answer saves some regex expense - possibly a few uS to lookup vs under 1uS + #todo - test if still worthwhile after a large cache is built up. (limit cache size?) + proc is_sgr {code} { + variable cache_is_sgr + if {[tcl::dict::exists $cache_is_sgr $code]} { + return [tcl::dict::get $cache_is_sgr $code] + } + set answer [punk::ansi::codetype::is_sgr $code] + tcl::dict::set cache_is_sgr $code $answer + return $answer + } + proc render_unapplied {overlay_grapheme_control_list gci} { + upvar idx_over idx_over + upvar unapplied unapplied + upvar unapplied_list unapplied_list ;#maintaining as a list allows caller to utilize it without having to re-split + upvar overstacks overstacks + upvar overstacks_gx overstacks_gx + upvar overlay_grapheme_control_stacks og_stacks + + #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] + set unapplied "" + set unapplied_list [list] + #append unapplied [join [lindex $overstacks $idx_over] ""] + #append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $overstacks $idx_over]] + set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + if {$sgr_merged ne ""} { + lappend unapplied_list $sgr_merged + } + switch -- [lindex $overstacks_gx $idx_over] { + "gx0_on" { + lappend unapplied_list "\x1b(0" + } + "gx0_off" { + lappend unapplied_list "\x1b(B" + } + } + + foreach gc [lrange $overlay_grapheme_control_list $gci+1 end] { + lassign $gc type item + #types g other sgr gx0 + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + lappend unapplied_list "\x1b(0" + } elseif {$item eq "gx0_off"} { + lappend unapplied_list "\x1b(B" + } + } else { + lappend unapplied_list $item + } + } + set unapplied [join $unapplied_list ""] + } + + #clearer - renders the specific gci forward as unapplied - prefixed with it's merged sgr stack + proc render_this_unapplied {overlay_grapheme_control_list gci} { + upvar idx_over idx_over + upvar unapplied unapplied + upvar unapplied_list unapplied_list + upvar overstacks overstacks + upvar overstacks_gx overstacks_gx + upvar overlay_grapheme_control_stacks og_stacks + + #set unapplied [join [lrange $overlay_grapheme_control_list $gci+1 end]] + set unapplied "" + set unapplied_list [list] + + set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + if {$sgr_merged ne ""} { + lappend unapplied_list $sgr_merged + } + switch -- [lindex $overstacks_gx $idx_over] { + "gx0_on" { + lappend unapplied_list "\x1b(0" + } + "gx0_off" { + lappend unapplied_list "\x1b(B" + } + } + + foreach gc [lrange $overlay_grapheme_control_list $gci end] { + lassign $gc type item + #types g other sgr gx0 + if {$type eq "gx0"} { + if {$item eq "gx0_on"} { + lappend unapplied_list "\x1b(0" + } elseif {$item eq "gx0_off"} { + lappend unapplied_list "\x1b(B" + } + } else { + lappend unapplied_list $item + } + } + set unapplied [join $unapplied_list ""] + } + proc render_delchar {i} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + set nxt [llength $o] + if {$i < $nxt} { + set o [lreplace $o $i $i] + set ustacks [lreplace $ustacks $i $i] + set gxstacks [lreplace $gxstacks $i $i] + } else { + + } + } + proc render_erasechar {i count} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + #ECH clears character attributes from erased character positions + #ECH accepts 0 or empty parameter, which is equivalent to 1. Caller should do that mapping and only supply 1 or greater. + if {![tcl::string::is integer -strict $count] || $count < 1} { + error "render_erasechar count must be integer >= 1" + } + set start $i + set end [expr {$i + $count -1}] + #we restrict ECH to current line - as some terminals do - review - is that the only way it's implemented? + if {$i > [llength $o]-1} { + return + } + if {$end > [llength $o]-1} { + set end [expr {[llength $o]-1}] + } + set num [expr {$end - $start + 1}] + set o [lreplace $o $start $end {*}[lrepeat $num \u0000]] ;#or space? + set ustacks [lreplace $ustacks $start $end {*}[lrepeat $num [list]]] + set gxstacks [lreplace $gxstacks $start $end {*}[lrepeat $num [list]]] + return + } + proc render_setchar {i c } { + upvar outcols o + lset o $i $c + } + #is actually addgrapheme? + proc render_addchar {i c sgrstack gx0stack {insert_mode 0}} { + upvar outcols o + upvar understacks ustacks + upvar understacks_gx gxstacks + + if 0 { + if {$c eq "c"} { + puts "i:$i c:$c sgrstack:[ansistring VIEW $sgrstack]" + puts "understacks:[ansistring VIEW $ustacks]" + upvar overstacks overstacks + puts "overstacks:[ansistring VIEW $overstacks]" + puts "info level 0:[info level 0]" + } + } + + set nxt [llength $o] + if {!$insert_mode} { + if {$i < $nxt} { + #These lists must always be in sync + lset o $i $c + } else { + lappend o $c + } + if {$i < [llength $ustacks]} { + lset ustacks $i $sgrstack + lset gxstacks $i $gx0stack + } else { + lappend ustacks $sgrstack + lappend gxstacks $gx0stack + } + } else { + #insert of single-width vs double-width when underlying is double-width? + if {$i < $nxt} { + set o [linsert $o $i $c] + } else { + lappend o $c + } + if {$i < [llength $ustacks]} { + set ustacks [linsert $ustacks $i $sgrstack] + set gxstacks [linsert $gxstacks $i $gx0stack] + } else { + lappend ustacks $sgrstack + lappend gxstacks $gx0stack + } + } + } + +} + + + +# -- --- --- --- --- --- --- --- --- --- --- +tcl::namespace::eval overtype { + interp alias {} ::overtype::center {} ::overtype::centre +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide overtype [tcl::namespace::eval overtype { + variable version + set version 1.6.5 +}] +return + +#*** !doctools +#[manpage_end] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm new file mode 100644 index 00000000..85cb9f27 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -0,0 +1,6521 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::ansi 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::ansi 0 0.1.1] +#[copyright "2023"] +#[titledesc {Ansi string functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk Ansi library}] [comment {-- Description at end of page heading --}] +#[require punk::ansi] +#[keywords module ansi terminal console string] +#[description] +#[para]Ansi based terminal control string functions +#[para]See [package punk::ansi::console] for related functions for controlling a console + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::ansi +#[para]punk::ansi functions return their values - no implicit emission to console/stdout +#[subsection Concepts] +#[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner +#[para]There are many differences in terminal implementations - but most should support a core set of features +#[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. +#[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::ansi +#[list_begin itemized] + +package require Tcl 8.6- +package require punk::char +package require punk::assertion +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {punk::char}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + + +tcl::namespace::eval punk::ansi::class { + if {![llength [tcl::info::commands class_ansi]]} { + + oo::class create class_ansi { + variable o_ansistringobj + + variable o_render_dimensions ;#last dimensions at which we rendered + variable o_rendered + variable o_rendered_what + constructor {ansitext {dimensions 80x25}} { + if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { + error "class_ansi::render dimensions must be of the form x" + } + + #a straight string compare may be faster.. but a checksum is much smaller in memory, so we'll use that by default. + set o_rendered_what "" + #There may also be advantages to renering to a class_ansistring class object + + set o_render_dimensions $dimensions + set o_ansistringobj [ansistring NEW $ansitext] + } + method get {} { + return [$o_ansistringobj get] + } + method render {{dimensions ""}} { + if {$dimensions eq ""} { + set dimensions $o_render_dimensions + } + if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { + error "class_ansi::render dimensions must be of the form x" + } + set cksum "not-done" + if {$dimensions ne $o_render_dimensions || $o_rendered_what ne [set cksum [$o_ansistringobj checksum]]} { + #some ansi layout/art relies on wrapping at the width-dimension to display properly + #this includes cursor movements ie right arrow can move cursor to columns in lines below + #overflow is a different concept - perhaps not particularly congruent with the idea of the textblock as a mini terminal emulator. + #overflow effectively auto-expands the block(terminal?) width + #overflow and wrap both being true won't make sense unless we implement a max_overflow concept + set o_rendered [overtype::renderspace -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] + if {$cksum eq "not-done"} { + #if dimensions changed - the checksum won't have been done + set o_rendered_what [$o_ansistringobj checksum] + } else { + set o_rendered_what $cksum + } + set o_render_dimensions $dimensions + } + + #todo - store rendered and allow partial rendering of new data lines? + return $o_rendered + } + method rendertest {{dimensions ""}} { + if {$dimensions eq ""} { + set dimensions $o_render_dimensions + } + if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { + error "class_ansi::render dimensions must be of the form x" + } + set o_dimensions $dimensions + + + set rendered [overtype::renderspace -experimental {test_mode} -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] + return $rendered + } + method render_to_input_line {args} { + if {[llength $args] < 1} { + puts stderr "render_to_input_line usage: ?-dimensions WxH? ?-minus charcount? x" + } + set x [lindex $args end] + set arglist [lrange $args 0 end-1] + if {[llength $arglist] %2 != 0} { + puts stderr "render_to_input_line usage: ?-dimensions WxH? ?-minus charcount? x" + } + set opts [tcl::dict::create\ + -dimensions 80x24\ + -minus 0\ + ] + foreach {k v} $arglist { + switch -- $k { + -dimensions - -minus { + tcl::dict::set opts $k $v + } + default { + puts stderr "render_to_input_line unexpected argument '$k' usage: ?-dimensions WxH? ?-minus charcount? x" + } + } + } + set opt_dimensions [tcl::dict::get $opts -dimensions] + set opt_minus [tcl::dict::get $opts -minus] + lassign [split $opt_dimensions x] w h + if {![tcl::string::is integer -strict $w] || ![tcl::string::is integer -strict $h] || $w < 1 || $h < 1} { + puts stderr "render_to_input_line WxH width & height must be positive integer values usage: ?-dimensions WxH? ?-minus charcount? x" + } + if {![tcl::string::is integer -strict $opt_minus]} { + puts stderr "render_to_input_line -minus must be positive integer value representing number of chars to exclude from end. usage: ?-dimensions WxH? ?-minus charcount? x" + } + + package require textblock + set lfvis [ansistring VIEW -lf 1 \n] + set maplf [list \n "[a+ green bold reverse]${lfvis}[a]\n"] ;#a mapping to highlight newlines + + set lines [split [$o_ansistringobj get] \n] + set rlines [lrange $lines 0 $x] + set chunk [::join $rlines \n] + append chunk \n + if {$opt_minus ne "0"} { + set chunk [tcl::string::range $chunk 0 end-$opt_minus] + } + set rendered [overtype::renderspace -experimental {test_mode} -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" $chunk] + set marker "" + for {set i 1} {$i <= $w} {incr i} { + if {$i % 10 == 0} { + ::append marker "|" + } elseif {$i % 5 == 0} { + ::append marker * + } else { + ::append marker "." + } + } + ::append rendered \n $marker + set xline [lindex $rlines $x]\n + set xlinev [ansistring VIEWSTYLE $xline] + set xlinev [tcl::string::map $maplf $xlinev] + set xlinedisplay [overtype::renderspace -wrap 1 -width $w -height 1 "" $xlinev] + ::append rendered \n $xlinedisplay + + set chunk [ansistring VIEWSTYLE $chunk] + set chunk [tcl::string::map $maplf $chunk] + #keep chunkdisplay narrower - leave at 80 or it will get unwieldy for larger image widths + set chunkdisplay [overtype::renderspace -wrap 1 -width 80 -height 1 "" $chunk] + set renderheight [llength [split $rendered \n]] + set chunkdisplay_lines [split $chunkdisplay \n] + set chunkdisplay_tail [lrange $chunkdisplay_lines end-$renderheight end] + set chunkdisplay_block [join $chunkdisplay_tail \n] + #the input chunk lines are often much longer than the output.. resulting in main content being way up the screen. It's often impractical to view more than the tail of the chunkdisplay. + textblock::join -- $rendered $chunkdisplay_block + } + + method checksum {} { + return [$o_ansistringobj checksum] + } + method checksum_last_rendered_input {} { + return $o_rendered_what + } + #todo - fix class_ansistring so the ansistring methods can be called directly + method viewlines {} { + return [ansistring VIEW [$o_ansistringobj get]] + } + method viewcodes {args} { + set defaults [list\ + -lf 0\ + -vt 0\ + -width "auto"\ + ] + set opts $defaults + foreach {k v} $args { + switch -- $k { + -lf - -vt - -width { + tcl::dict::set opts $k $v + } + default { + error "viewcodes unrecognised option '$k'. Known options [tcl::dict::keys $defaults]" + } + } + } + set opts_lf [tcl::dict::get $opts -lf] + set opts_vt [tcl::dict::get $opts -vt] + set opts_width [tcl::dict::get $opts -width] + if {$opts_width eq ""} { + return [ansistring VIEWCODES -lf $opts_lf -vt $opts_vt [$o_ansistringobj get]] + } elseif {$opts_width eq "auto"} { + lassign [punk::console::get_size] _cols columns _rows rows + set displaycols [expr {$columns -4}] ;#review + return [overtype::renderspace -width $displaycols -wrap 1 "" [ansistring VIEWCODES -lf $opts_lf -vt $opts_vt [$o_ansistringobj get]]] + } elseif {[tcl::string::is integer -strict $opts_width] && $opts_width > 0} { + return [overtype::renderspace -width $opts_width -wrap 1 "" [ansistring VIEWCODES -lf $opts_lf -vt $opts_vt [$o_ansistringobj get]]] + } else { + error "viewcodes unrecognised value for -width. Try auto or a positive integer" + } + } + method viewchars {args} { + set defaults [list\ + -width "auto"\ + ] + set opts $defaults + foreach {k v} $args { + switch -- $k { + -width { + tcl::dict::set opts $k $v + } + default { + error "viewchars unrecognised option '$k'. Known options [tcl::dict::keys $defaults]" + } + } + } + set opts_width [tcl::dict::get $opts -width] + if {$opts_width eq ""} { + return [punk::ansi::ansistripraw [$o_ansistringobj get]] + } elseif {$opts_width eq "auto"} { + lassign [punk::console::get_size] _cols columns _rows rows + set displaycols [expr {$columns -4}] ;#review + return [overtype::renderspace -width $displaycols -wrap 1 "" [punk::ansi::ansistripraw [$o_ansistringobj get]]] + } elseif {[tcl::string::is integer -strict $opts_width] && $opts_width > 0} { + return [overtype::renderspace -width $opts_width -wrap 1 "" [punk::ansi::ansistripraw [$o_ansistringobj get]]] + } else { + error "viewchars unrecognised value for -width. Try auto or a positive integer" + } + } + method viewstyle {args} { + set defaults [list\ + -width "auto"\ + ] + set opts $defaults + foreach {k v} $args { + switch -- $k { + -width { + tcl::dict::set opts $k $v + } + default { + error "viewstyle unrecognised option '$k'. Known options [tcl::dict::keys $defaults]" + } + } + } + set opts_width [tcl::dict::get $opts -width] + if {$opts_width eq ""} { + return [ansistring VIEWSTYLE [$o_ansistringobj get]] + } elseif {$opts_width eq "auto"} { + lassign [punk::console::get_size] _cols columns _rows rows + set displaycols [expr {$columns -4}] ;#review + return [overtype::renderspace -width $displaycols -wrap 1 "" [ansistring VIEWSTYLE [$o_ansistringobj get]]] + } elseif {[tcl::string::is integer -strict $opts_width] && $opts_width > 0} { + return [overtype::renderspace -width $opts_width -wrap 1 "" [ansistring VIEWSTYLE [$o_ansistringobj get]]] + } else { + error "viewstyle unrecognised value for -width. Try auto or a positive integer" + } + } + method append_noreturn {ansistring} { + $o_ansistringobj append $ansistring + #don't return the raw data - it may be big and probably won't play nicely with default terminal dimensions etc. + return + } + #like Tcl append - returns the result + #Tcl's append changes a variable state, this changes the object state + method append {ansistring} { + $o_ansistringobj append $ansistring + } + + } + } +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::ansi { + #*** !doctools + #[subsection {Namespace punk::ansi}] + #[para] Core API functions for punk::ansi + #[list_begin definitions] + + #old-school ansi graphics - C0 control glyphs. + variable cp437_map + #for cp437 images we need to map these *after* splitting ansi, to single-width unicode chars + #It would also probably be problematic to map \u000A to the glyph - as this is the newline - it included in the map anyway for completeness. The caller may have to manually carve that or other specific c0 controls out of the map to use it depending on the situation(?) + #Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs + #most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too + #by mapping these we can display regardless. + #nul char - no cp437 image but commonly used as space in ansi graphics. + #(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW + tcl::dict::set cp437_map \u0000 " " ;#space + tcl::dict::set cp437_map \u0001 \u263A ;#smiley + tcl::dict::set cp437_map \u0002 \u263B ;#smiley-filled + tcl::dict::set cp437_map \u0003 \u2665 ;#heart + tcl::dict::set cp437_map \u0004 \u2666 ;#diamond + tcl::dict::set cp437_map \u0005 \u2663 ;#club + tcl::dict::set cp437_map \u0006 \u2660 ;#spade + tcl::dict::set cp437_map \u0007 \u2022 ;#dot + tcl::dict::set cp437_map \u0008 \u25D8 ;#square hollow dot + tcl::dict::set cp437_map \u0009 \u25CB ;#hollow dot + tcl::dict::set cp437_map \u000A \u25D9 ;#square and dot (\n) + tcl::dict::set cp437_map \u000B \u2642 ;#male + tcl::dict::set cp437_map \u000C \u2640 ;#female + tcl::dict::set cp437_map \u000D \u266A ;#note1 (\r) + tcl::dict::set cp437_map \u000E \u266B ;#note2 + tcl::dict::set cp437_map \u000F \u263C ;#sun + tcl::dict::set cp437_map \u0010 \u25BA ;#right arrow triangle + tcl::dict::set cp437_map \u0011 \u25CA ;#left arrow triangle + tcl::dict::set cp437_map \u0012 \u2195 ;#updown arrow + tcl::dict::set cp437_map \u0013 \u203C ;#double bang + tcl::dict::set cp437_map \u0014 \u00B6 ;#pilcrow (paragraph mark / blind P) + tcl::dict::set cp437_map \u0015 \u00A7 ;#Section Sign + tcl::dict::set cp437_map \u0016 \u25AC ;#Heavy horizontal? + tcl::dict::set cp437_map \u0017 \u21A8 ;#updown arrow 2 ? + tcl::dict::set cp437_map \u0018 \u2191 ;#up arrow + tcl::dict::set cp437_map \u0019 \u2193 ;#down arrow + tcl::dict::set cp437_map \u001A \u2192 ;#right arrow + tcl::dict::set cp437_map \u001B \u2190 ;#left arrow + tcl::dict::set cp437_map \u001C \u221F ;#bottom left corner + tcl::dict::set cp437_map \u001D \u2194 ;#left-right arrow + tcl::dict::set cp437_map \u001E \u25B2 ;#up arrow triangle + tcl::dict::set cp437_map \u001F \u25BC ;#down arrow triangle + + variable map_special_graphics + #DEC Special Graphics set https://en.wikipedia.org/wiki/DEC_Special_Graphics + #AKA IBM Code page 1090 + tcl::dict::set map_special_graphics _ \u00a0 ;#no-break space + tcl::dict::set map_special_graphics "`" \u25c6 ;#black diamond + tcl::dict::set map_special_graphics a \u2592 ;#shaded block (checkerboard stipple), medium shade - Block Elements + tcl::dict::set map_special_graphics b \u2409 ;#symbol for HT + tcl::dict::set map_special_graphics c \u240c ;#symbol for FF + tcl::dict::set map_special_graphics d \u240d ;#symbol for CR + tcl::dict::set map_special_graphics e \u240a ;#symbol for LF + tcl::dict::set map_special_graphics f \u00b0 ;#degree sign + tcl::dict::set map_special_graphics g \u00b1 ;#plus-minus sign + tcl::dict::set map_special_graphics h \u2424 ;#symbol for NL + tcl::dict::set map_special_graphics i \u240b ;#symbol for VT + tcl::dict::set map_special_graphics j \u2518 ;#brc, light up and left - box drawing + tcl::dict::set map_special_graphics k \u2510 ;#trc, light down and left - box drawing + tcl::dict::set map_special_graphics l \u250c ;#tlc, light down and right - box drawing + tcl::dict::set map_special_graphics m \u2514 ;#blc, light up and right - box drawing + tcl::dict::set map_special_graphics n \u253c ;#light vertical and horizontal - box drawing + tcl::dict::set map_special_graphics o \u23ba ;#horizontal scan line-1 + tcl::dict::set map_special_graphics p \u23bb ;#horizontal scan line-3 + tcl::dict::set map_special_graphics q \u2500 ;#light horizontal - box drawing + tcl::dict::set map_special_graphics r \u23bc ;#horizontal scan line-7 + tcl::dict::set map_special_graphics s \u23bd ;#horizontal scan line-9 + tcl::dict::set map_special_graphics t \u251c ;#light vertical and right - box drawing + tcl::dict::set map_special_graphics u \u2524 ;#light vertical and left - box drawing + tcl::dict::set map_special_graphics v \u2534 ;#light up and horizontal - box drawing + tcl::dict::set map_special_graphics w \u252c ;#light down and horizontal - box drawing + tcl::dict::set map_special_graphics x \u2502 ;#light vertical - box drawing + tcl::dict::set map_special_graphics y \u2264 ;#less than or equal + tcl::dict::set map_special_graphics z \u2265 ;#greater than or equal + tcl::dict::set map_special_graphics "\{" \u03c0 ;#greek small letter pi + tcl::dict::set map_special_graphics "|" \u2260 ;#not equal to + tcl::dict::set map_special_graphics "\}" \u00a3 ;#pound sign + tcl::dict::set map_special_graphics ~ \u00b7 ;#middle dot + + #see also ansicolour page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control + + variable test "blah\033\[1;33mETC\033\[0;mOK" + + #Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc. Presumably this won't matter here. + tcl::namespace::export\ + {a?} {a+} a \ + ansistring\ + convert*\ + clear*\ + cursor_*\ + detect*\ + get_*\ + move*\ + reset*\ + ansistrip*\ + test_decaln\ + titleset\ + + + variable escape_terminals + #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). + tcl::dict::set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] + #tcl::dict::set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic + tcl::dict::set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals + tcl::dict::set escape_terminals DCS [list \007 \033\\ \u009c] + tcl::dict::set escape_terminals MISC [list \007 \033\\ \u009c] + #NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm ) + #This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? + + #review - there doesn't seem to be an \x1b#7 + # https://espterm.github.io/docs/VT100%20escape%20codes.html + + #self-contained 2 byte ansi escape sequences - review more? + set ansi_2byte_codes_dict [tcl::dict::create\ + "reset_terminal" "\u001bc"\ + "save_cursor_posn" "\u001b7"\ + "restore_cursor_posn" "\u001b8"\ + "cursor_up_one" "\u001bM"\ + "NEL - Next Line" "\u001bE"\ + "IND - Down one line" "\u001bD"\ + "HTS - Set Tab Stop" "\u001bH"\ + "DECPAM app keypad" "\x1b="\ + "DECPNM norm keypad" "\x1b>"\ + ] + + + # -------------------------------------- + #comparitive test (performance) string-append vs 2-object (with existing splits) append + proc test_cat1 {ansi1 ansi2} { + #make sure objects have splits + set s1 [ansistring NEW $ansi1] + tcl::namespace::eval [info object namespace $s1] {my MakeSplit} + set s2 [ansistring NEW $ansi2] + tcl::namespace::eval [info object namespace $s2] {my MakeSplit} + + #operation under test + # -- + #standard string append + $s1 append $ansi2 + # -- + $s2 destroy + + #$s1 append \033\[31mX ;#redX + return $s1 + } + proc test_cat2 {ansi1 ansi2} { + #make sure objects have splits + set s1 [ansistring NEW $ansi1] + tcl::namespace::eval [info object namespace $s1] {my MakeSplit} + set s2 [ansistring NEW $ansi2] + tcl::namespace::eval [info object namespace $s2] {my MakeSplit} + + #operation under test + # -- + #ansistring object append + $s1 appendobj $s2 + # -- + $s2 destroy + #$s1 append \033\[31mX ;#redX + return $s1 + } + # -------------------------------------- + + + #review - We have file possibly encoded directly in another codepage such as 437 - or utf8,utf16 etc, but then still needing post conversion to e.g cp437? + #In testing old ansi graphics files available on the web, some files need encoding {utf-8 cp437} some just cp437 + proc readfile {fname {encoding cp437}} { + #todo + #1- look for BOM - read according to format given by BOM + #2- assume utf-8 + #3- if errors - assume cp437? + + if {[llength $encoding] == 1} { + set ansidata [fcat -encoding $encoding $fname] + set obj [punk::ansi::class::class_ansi new $ansidata] + } elseif {[llength $encoding] == 2} { + set ansidata [fcat -encoding [lindex $encoding 0] $fname] + set ansidata [encoding convertfrom [lindex $encoding 1] $ansidata] + set obj [punk::ansi::class::class_ansi new $ansidata] + } else { + error "encoding list '$encoding' not supported. Use 1 or 2 encodings (first for file read, second as encoding convertfrom)" + } + return $obj + } + proc ansicat {fname args} { + set encnames [encoding names] + set encoding "" + set dimensions "" + set test_mode 0 + foreach a $args { + if {$a eq "test_mode"} { + set test_mode 1 + } elseif {$a in $encnames} { + set encoding $a + } else { + if {[regexp {[0-9]+(?:x|X)[0-9]+} $a]} { + set dimensions $a + } + } + } + if {$encoding eq ""} { + set encoding cp437 + } + + if {$dimensions eq ""} { + set dimensions 80x24 + } + + set ansidata [fcat -encoding $encoding $fname] + set obj [punk::ansi::class::class_ansi new $ansidata] + if {$encoding eq "cp437"} { + set result [$obj rendertest $dimensions] + } else { + set result [$obj render $dimensions] + } + $obj destroy + return $result + } + #utf-8/ascii encoded cp437 + proc ansicat2 {fname {encoding utf-8}} { + set data [fcat -encoding $encoding $fname] + set ansidata [encoding convertfrom cp437 $data] + set obj [punk::ansi::class::class_ansi new $ansidata] + set result [$obj render] + $obj destroy + return $result + } + proc example {} { + #todo - review dependency on punk::repo ? + package require textblock + package require punk::repo + package require punk::console + + set fnames [list belinda.ans bot.ans flower.ans fish.ans] + set base [punk::repo::find_project] + set ansibase [file join $base src/testansi] + if {![file exists $ansibase]} { + puts stderr "Missing testansi folder at $base/src/testansi" + puts stderr "Ensure ansi test files exist: $fnames" + #error "punk::ansi::example Cannot find example files" + } + set missingbase [a+ yellow][textblock::block 80 23 ?][a] + set pics [list] + foreach f $fnames { + if {![file exists $ansibase/$f]} { + set p [overtype::left $missingbase "[a+ red bold]\nMissing file\n$ansibase/$f[a]"] + lappend pics [tcl::dict::create filename $f pic $p status missing] + } else { + set img [join [lines_as_list -line trimline -block trimtail [ansicat $ansibase/$f]] \n] + lappend pics [tcl::dict::create filename $f pic $img status ok] + } + } + + set termsize [punk::console:::get_size] + set margin 4 + set freewidth [expr {[tcl::dict::get $termsize columns]-$margin}] + set per_row [expr {$freewidth / 80}] + + set rowlist [list] + set row [list] + set i 1 + foreach picinfo $pics { + set subtitle "" + if {[tcl::dict::get $picinfo status] ne "ok"} { + set subtitle [tcl::dict::get $picinfo status] + } + set title [tcl::dict::get $picinfo filename] + lappend row [textblock::frame -subtitle $subtitle -title $title [tcl::dict::get $picinfo pic]] + if {$i % $per_row == 0} { + lappend rowlist $row + set row [list] + } elseif {$i == [llength $pics]} { + lappend rowlist $row + } + incr i + } + + set result "" + foreach r $rowlist { + append result [textblock::join_basic -- {*}$r] \n + } + + + return $result + } + #control strings + #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf + # + #A control string is a string of bit combinations which may occur in the data stream as a logical entity for + #control purposes. A control string consists of an opening delimiter, a command string or a character string, + #and a terminating delimiter, the STRING TERMINATOR (ST). + #A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. + #A character string is a sequence of any bit combination, except those representing START OF STRING + #(SOS) or STRING TERMINATOR (ST). + #The interpretation of the command string or the character string is not defined by this Standard, but instead + #requires prior agreement between the sender and the recipient of the data. + #The opening delimiters defined in this Standard are + #a) APPLICATION PROGRAM COMMAND (APC) + #b) DEVICE CONTROL STRING (DCS) + #c) OPERATING SYSTEM COMMAND (OSC) + #d) PRIVACY MESSAGE (PM) + #e) START OF STRING (SOS) + # + + #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. + # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ + #The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out. + #todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. + #review - can terminals handle SGR codes within a PM? + #Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) + proc controlstring_PM {text} { + return "\x1b^${text}\033\\" + } + proc controlstring_PM8 {text} { + return "\x9e${text}\x9c" + } + proc controlstring_SOS {text} { + return "\x1bX${text}\033\\" + } + proc controlstring_SOS8 {text} { + return "\x98${text}\x9c" + } + proc controlstring_APC {text} { + return "\x1b_${text}\033\\" + } + proc controlstring_APC8 {text} { + return "\x9f${text}\x9c" + } + #there is also the SGR hide code (8) which has intermittent terminal support + #This doesn't change the output length - so support is tricky to detec. (terminal checksum report?) + + #candidate for zig/c implementation? + proc stripansi2 {text} { + + + set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters + join [::punk::ansi::ta::split_at_codes $text] "" + } + + + proc stripansi1 {text} { + + #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW + + variable escape_terminals ;#dict + variable ::punk::ansi::ta::standalone_code_map ;#map to empty string + + set text [convert_g0 $text] + + + set text [tcl::string::map $standalone_code_map $text] + #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm + #\x1b#3 double-height letters top half + #\x1b#4 double-height letters bottom half + #\x1b#5 single-width line + #\x1b#6 double-width line + #\x1b#8 dec test fill screen + + + #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. + + #Theoretically line endings can occur within an ST payload (review e.g title?) + #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) + + set inputlist [split $text ""] + set outputlist [list] + + set in_escapesequence 0 + #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) + + set i 0 + foreach u $inputlist { + set v [lindex $inputlist $i+1] + set uv ${u}${v} + if {$in_escapesequence eq "2b"} { + #2nd byte - done. + set in_escapesequence 0 + } elseif {$in_escapesequence != 0} { + set endseq [tcl::dict::get $escape_terminals $in_escapesequence] + if {$u in $endseq} { + set in_escapesequence 0 + } elseif {$uv in $endseq} { + set in_escapesequence 2b ;#flag next byte as last in sequence + } + } else { + #handle both 7-bit and 8-bit CSI and OSC + if {[regexp {^(?:\033\[|\u009b)} $uv]} { + set in_escapesequence CSI + } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { + set in_escapesequence OSC + } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { + set in_escapesequence DCS + } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { + #SOS,PM,APC - all terminated with ST + set in_escapesequence MISC + } else { + lappend outputlist $u + } + } + incr i + } + return [join $outputlist ""] + } + + #review - what happens when no terminator? + #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) + # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set + # esc) ?? + proc convert_g0 {text} { + variable map_special_graphics + + #using not \033 inside to stop greediness - review how does it compare to ".*?" + #variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} + #set re {\033\(0[^\033]*\033\(B} + #set re {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} + #set re2 {\033\(0(.*)\033\(B} ;#capturing + + #puts --$g-- + #box sample + #lqk + #x x + #mqj + #m = boxd_lur + + #don't call detect_g0 in here. Leave for caller. e.g ansistrip uses detect_g0 to decide whether to call this. + + set re_g0_open_or_close {\x1b\(0|\x1b\(B} + set parts [::punk::ansi::ta::_perlish_split $re_g0_open_or_close $text] + set out {} + set g0_on 0 + foreach {other g} $parts { + if {$g0_on} { + #split for non graphics-set codes + set othersplits [punk::ansi::ta::split_codes $other] ;#we don't need single codes here + foreach {inner_plaintext inner_codes} $othersplits { + lappend out [tcl::string::map $map_special_graphics $inner_plaintext] $inner_codes + #Simplifying assumption: no mapping required on any inner_codes - ST codes, titlesets etc don't require/use g0 content + } + } else { + lappend out $other ;#may be a mix of plaintext and other ansi codes - put it all through. + } + #trust our splitting regex has done the work to leave us with only \x1b\(0 or \x1b(B - test last char rather than use punk::ansi::codetype::is_gx_open/is_gx_close + switch -- [tcl::string::index $g end] { + 0 { + set g0_on 1 + } + B { + set g0_on 0 + } + } + } + return [join $out ""] + } + proc convert_g0_wrong {text} { + #Attempting to split on a group is wrong - because there could be other ansi codes while inside a g0 section + #That will either stop us matching - so no conversion - or risk converting parts of the ansi codes + #using not \033 inside to stop greediness - review how does it compare to ".*?" + #variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} + set re {\033\(0[^\033]*\033\(B} + set re2 {\033\(0(.*)\033\(B} ;#capturing + + #box sample + #lqk + #x x + #mqj + #m = boxd_lur + #set map [list l \u250f k \u2513] ;#heavy + set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines + #todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html + + set parts [::punk::ansi::ta::_perlish_split $re $text] + set out "" + foreach {pt g} $parts { + append out $pt + if {$g ne ""} { + #puts --$g-- + regexp $re2 $g _match contents + append out [tcl::string::map $map $contents] + } + } + return $out + } + + #Wrap text in ansi codes to switch to DEC alternate graphics character set. + proc g0 {text} { + return \x1b(0$text\x1b(B + } + proc ansistrip_gx {text} { + #e.g "\033(0" - select VT100 graphics for character set G0 + #e.g "\033(B" - reset + #e.g "\033)0" - select VT100 graphics for character set G1 + #e.g "\033)X" - where X is any char other than 0 to reset ?? + + #return [convert_g0 $text] + return [tcl::string::map [list "\x1b(0" "" \x1b(B" "" "\x1b)0" "" "\x1b)X" ""] $text] + } + proc stripansi_gx {text} { + return [tcl::string::map [list "\x1b(0" "" \x1b(B" "" "\x1b)0" "" "\x1b)X" ""] $text] + } + + + #CSI m = SGR (Select Graphic Rendition) +#leave map unindented - used both as a dict and for direct display + variable SGR_setting_map { +reset 0 bold 1 dim 2 italic 3 noitalic 23 +underline 4 doubleunderline 21 nounderline 24 blink 5 fastblink 6 noblink 25 +reverse 7 noreverse 27 hide 8 nohide 28 strike 9 nostrike 29 +normal 22 defaultfg 39 defaultbg 49 overline 53 nooverline 55 +frame 51 framecircle 52 noframe 54 underlinedefault 59 + } + #unprefixed colours are (close to) the ansi-specified colour names (lower-cased and whitespace collapsed, with capitalisation of 1st letter given fg/bg meaning here) +#leave map unindented - used both as a dict and for direct display + variable SGR_colour_map { +black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 +Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 +brightblack 90 brightred 91 brightgreen 92 brightyellow 93 brightblue 94 brightpurple 95 brightcyan 96 brightwhite 97 +Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblue 104 Brightpurple 105 Brightcyan 106 Brightwhite 107 + } + variable SGR_map ;#public - part of interface - review + set SGR_map [tcl::dict::merge $SGR_colour_map $SGR_setting_map] + + #we use prefixes e.g web-white and/or x11-white + #Only a leading capital letter will indicate the colour target is background vs lowercase for foreground + #In the map key-lookup context the colour names will be canonically lower case + #We should be case insensitive in the non-prefix part ie after determining fg/bg target from first letter of the prefix + #e.g Web-Lime or Web-lime are ok and are targeting background + #foreground target examples: web-Lime web-LIME web-DarkSalmon web-Darksalmon + + #specified in decimal - but we should also accept hex format directly in a+ function e.g #00FFFF for aqua + variable WEB_colour_map + #use the totitle format as the canonical lookup key + #don't use leading zeros - keep compatible with earlier tcl and avoid octal issue + # -- --- --- + #css 1-2.0 HTML 3.2-4 Basic colours eg web-silver for fg Web-silver for bg + # + variable WEB_colour_map_basic + tcl::dict::set WEB_colour_map_basic white 255-255-255 ;# #FFFFFF + tcl::dict::set WEB_colour_map_basic silver 192-192-192 ;# #C0C0C0 + tcl::dict::set WEB_colour_map_basic gray 128-128-128 ;# #808080 + tcl::dict::set WEB_colour_map_basic black 0-0-0 ;# #000000 + tcl::dict::set WEB_colour_map_basic red 255-0-0 ;# #FF0000 + tcl::dict::set WEB_colour_map_basic maroon 128-0-0 ;# #800000 + tcl::dict::set WEB_colour_map_basic yellow 255-255-0 ;# #FFFF00 + tcl::dict::set WEB_colour_map_basic olive 128-128-0 ;# #808000 + tcl::dict::set WEB_colour_map_basic lime 0-255-0 ;# #00FF00 + tcl::dict::set WEB_colour_map_basic green 0-128-0 ;# #008000 + tcl::dict::set WEB_colour_map_basic aqua 0-255-255 ;# #00FFFF + tcl::dict::set WEB_colour_map_basic teal 0-128-128 ;# #008080 + tcl::dict::set WEB_colour_map_basic blue 0-0-255 ;# #0000FF + tcl::dict::set WEB_colour_map_basic navy 0-0-128 ;# #000080 + tcl::dict::set WEB_colour_map_basic fuchsia 255-0-255 ;# #FF00FF + tcl::dict::set WEB_colour_map_basic purple 128-0-128 ;# #800080 + # -- --- --- + #Pink colours + variable WEB_colour_map_pink + tcl::dict::set WEB_colour_map_pink mediumvioletred 199-21-133 ;# #C71585 + tcl::dict::set WEB_colour_map_pink deeppink 255-20-147 ;# #FF1493 + tcl::dict::set WEB_colour_map_pink palevioletred 219-112-147 ;# #DB7093 + tcl::dict::set WEB_colour_map_pink hotpink 255-105-180 ;# #FF69B4 + tcl::dict::set WEB_colour_map_pink lightpink 255-182-193 ;# #FFB6C1 + tcl::dict::set WEB_colour_map_pink pink 255-192-203 ;# #FFCOCB + # -- --- --- + #Red colours + variable WEB_colour_map_red + tcl::dict::set WEB_colour_map_red darkred 139-0-0 ;# #8B0000 + tcl::dict::set WEB_colour_map_red red 255-0-0 ;# #FF0000 + tcl::dict::set WEB_colour_map_red firebrick 178-34-34 ;# #B22222 + tcl::dict::set WEB_colour_map_red crimson 220-20-60 ;# #DC143C + tcl::dict::set WEB_colour_map_red indianred 205-92-92 ;# #CD5C5C + tcl::dict::set WEB_colour_map_red lightcoral 240-128-128 ;# #F08080 + tcl::dict::set WEB_colour_map_red salmon 250-128-114 ;# #FA8072 + tcl::dict::set WEB_colour_map_red darksalmon 233-150-122 ;# #E9967A + tcl::dict::set WEB_colour_map_red lightsalmon 255-160-122 ;# #FFA07A + # -- --- --- + #Orange colours + variable WEB_colour_map_orange + tcl::dict::set WEB_colour_map_orange orangered 255-69-0 ;# #FF4500 + tcl::dict::set WEB_colour_map_orange tomato 255-99-71 ;# #FF6347 + tcl::dict::set WEB_colour_map_orange darkorange 255-140-0 ;# #FF8C00 + tcl::dict::set WEB_colour_map_orange coral 255-127-80 ;# #FF7F50 + tcl::dict::set WEB_colour_map_orange orange 255-165-0 ;# #FFA500 + # -- --- --- + #Yellow colours + variable WEB_colour_map_yellow + tcl::dict::set WEB_colour_map_yellow darkkhaki 189-183-107 ;# #BDB76B + tcl::dict::set WEB_colour_map_yellow gold 255-215-0 ;# #FFD700 + tcl::dict::set WEB_colour_map_yellow khaki 240-230-140 ;# #F0E68C + tcl::dict::set WEB_colour_map_yellow peachpuff 255-218-185 ;# #FFDAB9 + tcl::dict::set WEB_colour_map_yellow yellow 255-255-0 ;# #FFFF00 + tcl::dict::set WEB_colour_map_yellow palegoldenrod 238-232-170 ;# #EEE8AA + tcl::dict::set WEB_colour_map_yellow moccasin 255-228-181 ;# #FFE4B5 + tcl::dict::set WEB_colour_map_yellow papayawhip 255-239-213 ;# #FFEFD5 + tcl::dict::set WEB_colour_map_yellow lightgoldenrodyeallow 250-250-210 ;# #FAFAD2 + tcl::dict::set WEB_colour_map_yellow lemonchiffon 255-250-205 ;# #FFFACD + tcl::dict::set WEB_colour_map_yellow lightyellow 255-255-224 ;# #FFFFE0 + # -- --- --- + #Brown colours + #maroon as above + variable WEB_colour_map_brown + tcl::dict::set WEB_colour_map_brown brown 165-42-42 ;# #A52A2A + tcl::dict::set WEB_colour_map_brown saddlebrown 139-69-19 ;# #8B4513 + tcl::dict::set WEB_colour_map_brown sienna 160-82-45 ;# #A0522D + tcl::dict::set WEB_colour_map_brown chocolate 210-105-30 ;# #D2691E + tcl::dict::set WEB_colour_map_brown darkgoldenrod 184-134-11 ;# #B8860B + tcl::dict::set WEB_colour_map_brown peru 205-133-63 ;# #CD853F + tcl::dict::set WEB_colour_map_brown rosybrown 188-143-143 ;# #BC8F8F + tcl::dict::set WEB_colour_map_brown goldenrod 218-165-32 ;# #DAA520 + tcl::dict::set WEB_colour_map_brown sandybrown 244-164-96 ;# #F4A460 + tcl::dict::set WEB_colour_map_brown tan 210-180-140 ;# #D2B48C + tcl::dict::set WEB_colour_map_brown burlywood 222-184-135 ;# #DEB887 + tcl::dict::set WEB_colour_map_brown wheat 245-222-179 ;# #F5DEB3 + tcl::dict::set WEB_colour_map_brown navajowhite 255-222-173 ;# #FFDEAD + tcl::dict::set WEB_colour_map_brown bisque 255-228-196 ;# #FFEfC4 + tcl::dict::set WEB_colour_map_brown blanchedalmond 255-228-196 ;# #FFEfC4 + tcl::dict::set WEB_colour_map_brown cornsilk 255-248-220 ;# #FFF8DC + # -- --- --- + #Purple, violet, and magenta colours + variable WEB_colour_map_purple + tcl::dict::set WEB_colour_map_purple indigo 75-0-130 ;# #4B0082 + tcl::dict::set WEB_colour_map_purple purple 128-0-128 ;# #800080 + tcl::dict::set WEB_colour_map_purple darkmagenta 139-0-139 ;# #8B008B + tcl::dict::set WEB_colour_map_purple darkviolet 148-0-211 ;# #9400D3 + tcl::dict::set WEB_colour_map_purple darkslateblue 72-61-139 ;# #9400D3 + tcl::dict::set WEB_colour_map_purple blueviolet 138-43-226 ;# #8A2BE2 + tcl::dict::set WEB_colour_map_purple darkorchid 153-50-204 ;# #9932CC + tcl::dict::set WEB_colour_map_purple fuchsia 255-0-255 ;# #FF00FF + tcl::dict::set WEB_colour_map_purple magenta 255-0-255 ;# #FF00FF - same as fuchsia + tcl::dict::set WEB_colour_map_purple slateblue 106-90-205 ;# #6A5ACD + tcl::dict::set WEB_colour_map_purple mediumslateblue 123-104-238 ;# #7B68EE + tcl::dict::set WEB_colour_map_purple mediumorchid 186-85-211 ;# #BA5503 + tcl::dict::set WEB_colour_map_purple mediumpurple 147-112-219 ;# #9370DB + tcl::dict::set WEB_colour_map_purple orchid 218-112-214 ;# #DA70D6 + tcl::dict::set WEB_colour_map_purple violet 238-130-238 ;# #EE82EE + tcl::dict::set WEB_colour_map_purple plum 221-160-221 ;# #DDA0DD + tcl::dict::set WEB_colour_map_purple thistle 216-191-216 ;# #D88FD8 + tcl::dict::set WEB_colour_map_purple lavender 230-230-250 ;# #E6E6FA + # -- --- --- + #Blue colours + variable WEB_colour_map_blue + tcl::dict::set WEB_colour_map_blue midnightblue 25-25-112 ;# #191970 + tcl::dict::set WEB_colour_map_blue navy 0-0-128 ;# #000080 + tcl::dict::set WEB_colour_map_blue darkblue 0-0-139 ;# #00008B + tcl::dict::set WEB_colour_map_blue mediumblue 0-0-205 ;# #0000CD + tcl::dict::set WEB_colour_map_blue blue 0-0-255 ;# #0000FF + tcl::dict::set WEB_colour_map_blue royalblue 65-105-225 ;# #4169E1 + tcl::dict::set WEB_colour_map_blue steelblue 70-130-180 ;# #4682B4 + tcl::dict::set WEB_colour_map_blue dodgerblue 30-144-255 ;# #1E90FF + tcl::dict::set WEB_colour_map_blue deepskyblue 0-191-255 ;# #00BFFF + tcl::dict::set WEB_colour_map_blue cornflowerblue 100-149-237 ;# #6495ED + tcl::dict::set WEB_colour_map_blue skyblue 135-206-235 ;# #87CEEB + tcl::dict::set WEB_colour_map_blue lightskyblue 135-206-250 ;# #87CEFA + tcl::dict::set WEB_colour_map_blue lightsteelblue 176-196-222 ;# #B0C4DE + tcl::dict::set WEB_colour_map_blue lightblue 173-216-230 ;# #ADD8E6 + tcl::dict::set WEB_colour_map_blue powderblue 176-224-230 ;# #B0E0E6 + # -- --- --- + #Cyan colours + #teal as above + variable WEB_colour_map_cyan + tcl::dict::set WEB_colour_map_cyan darkcyan 0-139-139 ;# #008B8B + tcl::dict::set WEB_colour_map_cyan lightseagreen 32-178-170 ;# #20B2AA + tcl::dict::set WEB_colour_map_cyan cadetblue 95-158-160 ;# #5F9EA0 + tcl::dict::set WEB_colour_map_cyan darkturquoise 0-206-209 ;# #00CED1 + tcl::dict::set WEB_colour_map_cyan mediumturquoise 72-209-204 ;# #48D1CC + tcl::dict::set WEB_colour_map_cyan turquoise 64-224-208 ;# #40E0D0 + tcl::dict::set WEB_colour_map_cyan aqua 0-255-255 ;# #00FFFF + tcl::dict::set WEB_colour_map_cyan cyan 0-255-255 ;# #00FFFF - same as aqua + tcl::dict::set WEB_colour_map_cyan aquamarine 127-255-212 ;# #7FFFD4 + tcl::dict::set WEB_colour_map_cyan paleturquoise 175-238-238 ;# #AFEEEE + tcl::dict::set WEB_colour_map_cyan lightcyan 224-255-255 ;# #E0FFFF + # -- --- --- + #Green colours + variable WEB_colour_map_green + tcl::dict::set WEB_colour_map_green darkgreen 0-100-0 ;# #006400 + tcl::dict::set WEB_colour_map_green green 0-128-0 ;# #008000 + tcl::dict::set WEB_colour_map_green darkolivegreen 85-107-47 ;# #55682F + tcl::dict::set WEB_colour_map_green forestgreen 34-139-34 ;# #228B22 + tcl::dict::set WEB_colour_map_green seagreen 46-139-87 ;# #2E8B57 + tcl::dict::set WEB_colour_map_green olive 128-128-0 ;# #808000 + tcl::dict::set WEB_colour_map_green olivedrab 107-142-35 ;# #6B8E23 + tcl::dict::set WEB_colour_map_green mediumseagreen 60-179-113 ;# #3CB371 + tcl::dict::set WEB_colour_map_green limegreen 50-205-50 ;# #32CD32 + tcl::dict::set WEB_colour_map_green lime 0-255-0 ;# #00FF00 + tcl::dict::set WEB_colour_map_green springgreen 0-255-127 ;# #00FF7F + tcl::dict::set WEB_colour_map_green mediumspringgreen 0-250-154 ;# #00FA9A + tcl::dict::set WEB_colour_map_green darkseagreen 143-188-143 ;# #8FBC8F + tcl::dict::set WEB_colour_map_green mediumaquamarine 102-205-170 ;# #66CDAA + tcl::dict::set WEB_colour_map_green yellowgreen 154-205-50 ;# #9ACD32 + tcl::dict::set WEB_colour_map_green lawngreen 124-252-0 ;# #7CFC00 + tcl::dict::set WEB_colour_map_green chartreuse 127-255-0 ;# #7FFF00 + tcl::dict::set WEB_colour_map_green lightgreen 144-238-144 ;# #90EE90 + tcl::dict::set WEB_colour_map_green greenyellow 173-255-47 ;# #ADFF2F + tcl::dict::set WEB_colour_map_green palegreen 152-251-152 ;# #98FB98 + # -- --- --- + #White colours + variable WEB_colour_map_white + tcl::dict::set WEB_colour_map_white mistyrose 255-228-225 ;# #FFE4E1 + tcl::dict::set WEB_colour_map_white antiquewhite 250-235-215 ;# #FAEBD7 + tcl::dict::set WEB_colour_map_white linen 250-240-230 ;# #FAF0E6 + tcl::dict::set WEB_colour_map_white beige 245-245-220 ;# #F5F5DC + tcl::dict::set WEB_colour_map_white whitesmoke 245-245-245 ;# #F5F5F5 + tcl::dict::set WEB_colour_map_white lavenderblush 255-240-245 ;# #FFF0F5 + tcl::dict::set WEB_colour_map_white oldlace 253-245-230 ;# #FDF5E6 + tcl::dict::set WEB_colour_map_white aliceblue 240-248-255 ;# #F0F8FF + tcl::dict::set WEB_colour_map_white seashell 255-245-238 ;# #FFF5EE + tcl::dict::set WEB_colour_map_white ghostwhite 248-248-255 ;# #F8F8FF + tcl::dict::set WEB_colour_map_white honeydew 240-255-240 ;# #F0FFF0 + tcl::dict::set WEB_colour_map_white floralwhite 255-250-240 ;# #FFFAF0 + tcl::dict::set WEB_colour_map_white azure 240-255-255 ;# #F0FFFF + tcl::dict::set WEB_colour_map_white mintcream 245-255-250 ;# #F5FFFA + tcl::dict::set WEB_colour_map_white snow 255-250-250 ;# #FFFAFA + tcl::dict::set WEB_colour_map_white ivory 255-255-240 ;# #FFFFF0 + tcl::dict::set WEB_colour_map_white white 255-255-255 ;# #FFFFFF + # -- --- --- + #Gray and black colours + variable WEB_colour_map_gray + tcl::dict::set WEB_colour_map_gray black 0-0-0 ;# #000000 + tcl::dict::set WEB_colour_map_gray darkslategray 47-79-79 ;# #2F4F4F + tcl::dict::set WEB_colour_map_gray dimgray 105-105-105 ;# #696969 + tcl::dict::set WEB_colour_map_gray slategray 112-128-144 ;# #708090 + tcl::dict::set WEB_colour_map_gray gray 128-128-128 ;# #808080 + tcl::dict::set WEB_colour_map_gray lightslategray 119-136-153 ;# #778899 + tcl::dict::set WEB_colour_map_gray darkgray 169-169-169 ;# #A9A9A9 + tcl::dict::set WEB_colour_map_gray silver 192-192-192 ;# #C0C0C0 + tcl::dict::set WEB_colour_map_gray lightgray 211-211-211 ;# #D3D3D3 + tcl::dict::set WEB_colour_map_gray gainsboro 220-220-220 ;# #DCDCDC + + set WEB_colour_map [tcl::dict::merge\ + $WEB_colour_map_basic\ + $WEB_colour_map_pink\ + $WEB_colour_map_red\ + $WEB_colour_map_orange\ + $WEB_colour_map_yellow\ + $WEB_colour_map_brown\ + $WEB_colour_map_purple\ + $WEB_colour_map_blue\ + $WEB_colour_map_cyan\ + $WEB_colour_map_green\ + $WEB_colour_map_white\ + $WEB_colour_map_gray\ + ] + + #we should be able to use WEB_colour_map as a base and override only the conflicts for X11 colours ? Review - check if this is true + variable X11_colour_map_diff ;#maintain the difference as a separate dict so we can display in a? x11 + tcl::dict::set X11_colour_map_diff gray 190-190-190 ;# #BEBEBE + tcl::dict::set X11_colour_map_diff green 0-255-0 ;# #00FF00 + tcl::dict::set X11_colour_map_diff maroon 176-48-96 ;# #B03060 + tcl::dict::set X11_colour_map_diff purple 160-32-240 ;# #A020F0 + + variable X11_colour_map + set X11_colour_map [tcl::dict::merge $WEB_colour_map $X11_colour_map_diff] + + + #Xterm colour names (256 colours) + #lists on web have duplicate names + #these have been renamed here in a systematic way: + #They are suffixed with a dash and a letter e.g second deepskyblue4 -> deepskyblue4-b, third deepskyblue4 -> deepskyblue4-c + #presumably the xterm colour names are not widely used or are used for reverse lookup from rgb to get an approximate name in the case of dupes? + #Review! + #keep duplicate names in the list and map them when building the dict. + + #This is an in depth analysis of the xterm colour set which gives names(*) to all of the 256 colours and describes possible indexing by Hue,Luminance,Saturation + #https://www.wowsignal.io/articles/xterm256 + # *The names are wildly-imaginative, often unintuitively so, and multiple (5?) given for each colour - so they are unlikely to be of practical use or any sort of standard. + #e.g who is to know that 'Rabbit Paws', 'Forbidden Thrill' and 'Tarsier' refer to a particular shade of pinky-red? (code 95) + #Perhaps it's an indication that colour naming once we get to 256 colours or more is a fool's errand anyway. + #The xterm names are boringly unimaginative - and also have some oddities such as: + # DarkSlateGray1 which looks much more like cyan.. + # The greyxx names are spelt with an e - but the darkslategrayX variants use an a. Perhaps that's because they are more cyan than grey and the a is a hint? + # there is no gold or gold2 - but there is gold1 and gold3 + #but in general the names bear some resemblance to the colours and are at least somewhat intuitive. + + set xterm_names [list\ + black\ + maroon\ + green\ + olive\ + navy\ + purple\ + teal\ + silver\ + grey\ + red\ + lime\ + yellow\ + blue\ + fuchsia\ + aqua\ + white\ + grey0\ + navyblue\ + darkblue\ + blue3\ + blue3\ + blue1\ + darkgreen\ + deepskyblue4\ + deepskyblue4\ + deepskyblue4\ + dodgerblue3\ + dodgerblue2\ + green4\ + springgreen4\ + turquoise4\ + deepskyblue3\ + deepskyblue3\ + dodgerblue1\ + green3\ + springgreen3\ + darkcyan\ + lightseagreen\ + deepskyblue2\ + deepskyblue1\ + green3\ + springgreen3\ + springgreen2\ + cyan3\ + darkturquoise\ + turquoise2\ + green1\ + springgreen2\ + springgreen1\ + mediumspringgreen\ + cyan2\ + cyan1\ + darkred\ + deeppink4\ + purple4\ + purple4\ + purple3\ + blueviolet\ + orange4\ + grey37\ + mediumpurple4\ + slateblue3\ + slateblue3\ + royalblue1\ + chartreuse4\ + darkseagreen4\ + paleturquoise4\ + steelblue\ + steelblue3\ + cornflowerblue\ + chartreuse3\ + darkseagreen4\ + cadetblue\ + cadetblue\ + skyblue3\ + steelblue1\ + chartreuse3\ + palegreen3\ + seagreen3\ + aquamarine3\ + mediumturquoise\ + steelblue1\ + chartreuse2\ + seagreen2\ + seagreen1\ + seagreen1\ + aquamarine1\ + darkslategray2\ + darkred\ + deeppink4\ + darkmagenta\ + darkmagenta\ + darkviolet\ + purple\ + orange4\ + lightpink4\ + plum4\ + mediumpurple3\ + mediumpurple3\ + slateblue1\ + yellow4\ + wheat4\ + grey53\ + lightslategrey\ + mediumpurple\ + lightslateblue\ + yellow4\ + darkolivegreen3\ + darkseagreen\ + lightskyblue3\ + lightskyblue3\ + skyblue2\ + chartreuse2\ + darkolivegreen3\ + palegreen3\ + darkseagreen3\ + darkslategray3\ + skyblue1\ + chartreuse1\ + lightgreen\ + lightgreen\ + palegreen1\ + aquamarine1\ + darkslategray1\ + red3\ + deeppink4\ + mediumvioletred\ + magenta3\ + darkviolet\ + purple\ + darkorange3\ + indianred\ + hotpink3\ + mediumorchid3\ + mediumorchid\ + mediumpurple2\ + darkgoldenrod\ + lightsalmon3\ + rosybrown\ + grey63\ + mediumpurple2\ + mediumpurple1\ + gold3\ + darkkhaki\ + navajowhite\ + grey69\ + lightsteelblue3\ + lightsteelblue\ + yellow3\ + darkolivegreen3\ + darkseagreen3\ + darkseagreen2\ + lightcyan3\ + lightskyblue1\ + greenyellow\ + darkolivegreen2\ + palegreen1\ + darkseagreen2\ + darkseagreen1\ + paleturquoise1\ + red3\ + deeppink3\ + deeppink3\ + magenta3\ + magenta3\ + magenta2\ + darkorange3\ + indianred\ + hotpink3\ + hotpink2\ + orchid\ + mediumorchid1\ + orange3\ + lightsalmon3\ + lightpink3\ + pink3\ + plum3\ + violet\ + gold3\ + lightgoldenrod3\ + tan\ + mistyrose3\ + thistle3\ + plum2\ + yellow3\ + khaki3\ + lightgoldenrod2\ + lightyellow3\ + grey84\ + lightsteelblue1\ + yellow2\ + darkolivegreen1\ + darkolivegreen1\ + darkseagreen1\ + honeydew2\ + lightcyan1\ + red1\ + deeppink2\ + deeppink1\ + deeppink1\ + magenta2\ + magenta1\ + orangered1\ + indianred1\ + indianred1\ + hotpink\ + hotpink\ + mediumorchid1\ + darkorange\ + salmon1\ + lightcoral\ + palevioletred1\ + orchid2\ + orchid1\ + orange1\ + sandybrown\ + lightsalmon1\ + lightpink1\ + pink1\ + plum1\ + gold1\ + lightgoldenrod2\ + lightgoldenrod2\ + navajowhite1\ + mistyrose1\ + thistle1\ + yellow1\ + lightgoldenrod1\ + khaki1\ + wheat1\ + cornsilk1\ + grey100\ + grey3\ + grey7\ + grey11\ + grey11\ + grey15\ + grey19\ + grey23\ + grey27\ + grey30\ + grey35\ + grey39\ + grey42\ + grey46\ + grey50\ + grey54\ + grey58\ + grey62\ + grey66\ + grey70\ + grey74\ + grey78\ + grey82\ + grey85\ + grey89\ + grey93\ + ] + variable TERM_colour_map + set TERM_colour_map [tcl::dict::create] + variable TERM_colour_map_reverse + set TERM_colour_map_reverse [tcl::dict::create] + set cidx 0 + foreach cname $xterm_names { + if {![tcl::dict::exists $TERM_colour_map $cname]} { + tcl::dict::set TERM_colour_map $cname $cidx + tcl::dict::set TERM_colour_map_reverse $cidx $cname + } else { + set did_rename 0 + #start suffixes at '-b'. The base name could be considered the '-a' version - but we don't create it. + foreach {suffix} {b c} { + if {![tcl::dict::exists $TERM_colour_map $cname-$suffix]} { + tcl::dict::set TERM_colour_map $cname-$suffix $cidx + tcl::dict::set TERM_colour_map_reverse $cidx $cname-$suffix + set did_rename 1 + break + } + } + if {!$did_rename} { + error "Not enough suffixes for duplicate names in xterm colour list. Add more suffixes or review list" + } + } + incr cidx + } + + + + + #colour_hex2ansidec + #conversion of hex to format directly pluggable to ansi rgb format (colon separated e.g for foreground we need "38;2;$r;$g;$b" so we return $r;$g;$b) + #we want to support arbitrary rgb values specified in hex - so a table of 16M+ is probably not a great idea + #hex zero-padded - canonically upper case but mixed or lower accepted + #dict for {k v} $WEB_colour_map { + # set dectriple [split $v -] + # set webhex [::join [format %02X%02X%02X {*}$dectriple] ;# e.g 808080, FFFFFF, 000000 + # tcl::dict::set HEX_colour_map $webhex [join $dectriple {;}] + #} + proc colour_hex2ansidec {hex6} { + return [join [::scan $hex6 %2X%2X%2X] {;}] + } + + #convert between hex and decimal as used in the a+ function + # eg dec-dec-dec <-> #HHHHHH + #allow hex to be specified with or without leading # + proc colour_hex2dec {hex6} { + set hex6 [tcl::string::map {# ""} $hex6] + return [join [::scan $hex6 %2X%2X%2X] {-}] + } + proc colour_dec2hex {decimalcolourstring} { + set dec [tcl::string::map [list {;} - , -] $decimalcolourstring] + set declist [split $dec -] + set hex #[format %02X%02X%02X {*}$declist] + } + + proc get_sgr_map {} { + variable SGR_map + return $SGR_map + } + + proc colourmap1 {args} { + set opts {-bg Web-white -forcecolour 0} + foreach {k v} $args { + switch -- $k { + -bg - -forcecolour { + tcl::dict::set opts $k $v + } + default { + error "colourmap1 unrecognised option $k. Known-options: [tcl::dict::keys $opts] + } + } + } + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } else { + set fc "" + } + set bgname [tcl::dict::get $opts -bg] + + package require textblock + set bg [textblock::block 33 3 "[a+ {*}$fc $bgname] [a]"] + set colourmap "" + set RST [a] + for {set i 0} {$i <= 7} {incr i} { + #append colourmap "_[a+ white bold 48\;5\;$i] $i [a]" + append colourmap "_[a+ {*}$fc white bold Term-$i] $i $RST" + } + set map1 [overtype::left -transparent _ $bg "\n$colourmap"] + return $map1 + } + proc colourmap2 {args} { + set defaults {-forcecolour 0 -bg Web-white} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set bgname [tcl::dict::get $opts -bg] + + package require textblock + set bg [textblock::block 39 3 "[a+ {*}$fc $bgname] [a]"] + set colourmap "" + set RST [a] + for {set i 8} {$i <= 15} {incr i} { + if {$i == 8} { + set fg "bold white" + } else { + set fg "black normal" ;#black normal is often blacker than black bold - which can display as a grey + } + append colourmap "_[a+ {*}$fc {*}$fg 48\;5\;$i] $i $RST" + } + set map2 [overtype::left -transparent _ $bg "\n$colourmap"] + return $map2 + } + proc colourtable_216 {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + package require textblock + set clist [list] + set fg "black" + for {set i 16} {$i <=231} {incr i} { + if {$i % 18 == 16} { + if {$fg eq "black"} { + set fg "bold white" + } else { + set fg "black" + } + } + lappend clist "[a+ {*}$fc {*}$fg Term$i][format %3s $i]" + } + + set t [textblock::list_as_table -columns 36 -return tableobject $clist] + $t configure -show_hseps 0 + #return [$t print] + return $t + } + + #1st 16 colours of 256 - match SGR colours + proc colourblock_16 {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set out "" + set fg "bold white" + for {set i 0} {$i <= 15} {incr i} { + #8 is black - so start black fg at 9 + if {$i > 8} { + set fg "web-black" + } + append out "[a+ {*}$fc {*}$fg Term$i][format %3s $i] " + } + return $out[a] + } + proc colourtable_16_names {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + variable TERM_colour_map_reverse + set rows [list] + set row [list] + set fg "web-white" + set t [textblock::class::table new] + $t configure -show_seps 0 -show_edge 0 + for {set i 0} {$i <=15} {incr i} { + set cname [tcl::dict::get $TERM_colour_map_reverse $i] ;#use term-$i etc instead of term-$name? + if {[llength $row]== 8} { + lappend rows $row + set row [list] + } + if {$i == 8} { + set fg "web-white" + } elseif {$i > 6} { + set fg "web-black" + } + #lappend row "[a+ {*}$fc {*}$fg Term-$cname][format %3s $i] $cname " + lappend row "[a+ {*}$fc {*}$fg Term-$i][format %3s $i] $cname " + } + lappend rows $row + foreach r $rows { + $t add_row $r + } + append out [$t print] + $t destroy + append out [a] + return [tcl::string::trimleft $out \n] + + } + #216 colours of 256 + proc colourblock_216 {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set out "" + set fg "web-black" + for {set i 16} {$i <=231} {incr i} { + if {$i % 18 == 16} { + if {$fg eq "web-black"} { + set fg "web-white" + } else { + set fg "web-black" + } + set br "\n" + } else { + set br "" + } + append out "$br[a+ {*}$fc {*}$fg Term$i][format %3s $i] " + } + append out [a] + return [tcl::string::trimleft $out \n] + } + + #x6 is reasonable from a width (124 screen cols) and colour viewing perspective + proc colourtable_216_names {args} { + set defaults {-forcecolour 0 -columns 6} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set cols [tcl::dict::get $opts -columns] + + set out "" + #use the reverse lookup dict - the original xterm_names list has duplicates - we want the disambiguated (potentially suffixed) names + variable TERM_colour_map_reverse + set rows [list] + set row [list] + set fg "web-black" + set t [textblock::class::table new] + $t configure -show_seps 0 -show_edge 0 + for {set i 16} {$i <=231} {incr i} { + set cname [tcl::dict::get $TERM_colour_map_reverse $i] ;#use term-cname etc instead of term$i - may as well let a+ cache the call by name as the preferred? option + if {[llength $row]== $cols} { + lappend rows $row + set row [list] + } + if {$i % 18 == 16} { + if {$fg eq "web-black"} { + set fg "web-white" + } else { + set fg "web-black" + } + } + lappend row "[a+ {*}$fc {*}$fg Term-$cname][format %3s $i] $cname " + } + lappend rows $row + foreach r $rows { + $t add_row $r + } + append out [$t print] + $t destroy + append out [a] + return [tcl::string::trimleft $out \n] + } + proc colourtable_term_pastel {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set out "" + set rows [list] + #see https://www.hackitu.de/termcolor256/ + lappend rows {59 95 131 167 174 181 188} + lappend rows {59 95 131 173 180 187 188} + lappend rows {59 95 137 179 186 187 188} + lappend rows {59 101 143 185 186 187 188} + lappend rows {59 65 107 149 186 187 188} + lappend rows {59 65 71 113 150 187 188} + lappend rows {59 65 71 77 114 151 188} + lappend rows {59 65 71 78 115 152 188} + lappend rows {59 65 72 79 116 152 188} + lappend rows {59 66 73 80 116 152 188} + lappend rows {59 60 67 74 116 152 188} + lappend rows {59 60 61 68 110 152 188} + lappend rows {59 60 61 62 104 146 188} + lappend rows {59 60 61 98 140 182 188} + lappend rows {59 60 97 134 176 182 188} + lappend rows {59 96 133 170 176 182 188} + lappend rows {59 95 132 169 176 182 188} + lappend rows {59 95 131 168 175 182 188} + + set t [textblock::class::table new] + $t configure -show_seps 0 -show_edge 0 + set fg "web-black" + foreach r $rows { + set rowcells [list] + foreach cnum $r { + lappend rowcells "[a+ {*}$fc $fg Term-$cnum][format %3s $cnum] " + } + $t add_row $rowcells + } + append out [$t print] + $t destroy + set pastel8 [list 102 138 144 108 109 103 139 145] + set p8 "" + foreach cnum $pastel8 { + append p8 "[a+ {*}$fc $fg Term-$cnum][format %3s $cnum] " + } + append p8 [a]\n + append out \n $p8 + + return $out + } + proc colourtable_term_rainbow {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set out "" + set rows [list] + set fgwhite [list 16 52 88 124 160 22 17 18 19 20 21 57 56 93 55 92 54 91 53 90 89 126 88 125 124 160] + #see https://www.hackitu.de/termcolor256/ + lappend rows {16 52 88 124 160 196 203 210 217 224 231} + lappend rows {16 52 88 124 160 202 209 216 223 230 231} + lappend rows {16 52 88 124 166 208 215 222 229 230 231} + lappend rows {16 52 88 130 172 214 221 228 229 230 231} + lappend rows {16 52 94 136 178 220 227 227 228 230 231} + + lappend rows {16 58 100 142 184 226 227 228 228 230 231} + + lappend rows {16 22 64 106 148 190 227 228 229 230 231} + lappend rows {16 22 28 70 112 154 191 228 229 230 231} + lappend rows {16 22 28 34 76 118 155 192 229 230 231} + lappend rows {16 22 28 34 40 82 119 156 193 230 231} + lappend rows {16 22 28 34 40 46 83 120 157 194 231} + lappend rows {16 22 28 34 40 47 84 121 158 195 231} + lappend rows {16 22 28 34 41 48 85 122 158 195 231} + lappend rows {16 22 28 35 42 49 86 123 159 195 231} + lappend rows {16 22 29 36 43 50 87 123 159 195 231} + + lappend rows {16 23 30 37 44 51 87 123 159 195 231} + + lappend rows {16 17 24 31 38 45 87 123 159 195 231} + lappend rows {16 17 18 25 32 39 81 123 159 195 231} + lappend rows {16 17 18 19 26 33 75 117 159 195 231} + lappend rows {16 17 18 19 20 27 69 111 153 195 231} + lappend rows {16 17 18 19 20 21 63 105 147 189 231} + lappend rows {16 17 18 19 20 57 99 141 183 225 231} + lappend rows {16 17 18 19 56 93 135 177 219 225 231} + lappend rows {16 17 18 55 92 129 171 213 219 225 231} + lappend rows {16 17 54 91 128 165 207 213 219 225 231} + + lappend rows {16 53 90 127 164 201 207 213 219 225 231} + + lappend rows {16 52 89 126 163 200 207 213 219 225 231} + lappend rows {16 52 88 125 162 199 206 213 219 225 231} + lappend rows {16 52 88 124 161 198 205 212 219 225 231} + lappend rows {16 52 88 124 160 197 204 211 218 225 231} + + + set t [textblock::class::table new] + $t configure -show_seps 0 -show_edge 0 + foreach r $rows { + set rowcells [list] + foreach cnum $r { + if {$cnum in $fgwhite} { + set fg "web-white" + } else { + set fg "web-black" + } + lappend rowcells "[a+ {*}$fc $fg Term-$cnum][format %3s $cnum] " + } + $t add_row $rowcells + } + append out [$t print] + $t destroy + return $out + } + #24 greys of 256 + proc colourblock_24 {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + + set out "" + set fg "bold white" + for {set i 232} {$i <= 255} {incr i} { + if {$i > 243} { + set fg "web-black" + } + append out "[a+ {*}$fc {*}$fg Term$i][format %3s $i] " + } + return $out[a] + + } + proc colourtable_24_names {args} { + set defaults {-forcecolour 0} + set opts [tcl::dict::merge $defaults $args] + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + + variable TERM_colour_map_reverse + set rows [list] + set row [list] + set fg "web-white" + set t [textblock::class::table new] + $t configure -show_hseps 0 -show_edge 0 + for {set i 232} {$i <=255} {incr i} { + set cname [tcl::dict::get $TERM_colour_map_reverse $i] ;#use term-cname etc instead of term$i - may as well let a+ cache the call by name as the preferred? option + if {[llength $row]== 8} { + lappend rows $row + set row [list] + } + if {$i > 243} { + set fg "web-black" + } + lappend row "[a+ {*}$fc {*}$fg Term-$cname][format %3s $i] $cname " + } + lappend rows $row + foreach r $rows { + $t add_row $r + } + append out [$t print] + $t destroy + append out [a] + return [tcl::string::trimleft $out \n] + + } + #set WEB_colour_map [tcl::dict::merge\ + # $WEB_colour_map_basic\ + # $WEB_colour_map_pink\ + # $WEB_colour_map_red\ + # $WEB_colour_map_orange\ + # $WEB_colour_map_yellow\ + # $WEB_colour_map_brown\ + # $WEB_colour_map_purple\ + # $WEB_colour_map_blue\ + # $WEB_colour_map_cyan\ + # $WEB_colour_map_green\ + # $WEB_colour_map_white\ + # $WEB_colour_map_gray\ + #] + proc colourtable_web {args} { + set opts {-forcecolour 0 -groups *} + foreach {k v} $args { + switch -- $k { + -groups - -forcecolour { + tcl::dict::set opts $k $v + } + default { + error "colourtable_web unrecognised option '$k'. Known-options: [tcl::dict::keys $defaults]" + } + } + } + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + set groups [tcl::dict::get $opts -groups] + + #set all_groupnames [list basic pink red orange yellow brown purple blue cyan green white gray] + set all_groupnames [list basic brown yellow red pink orange purple blue cyan green white gray] + switch -- $groups { + "" - * { + set show_groups $all_groupnames + } + ? { + return "Web group names: $all_groupnames" + } + default { + foreach g $groups { + if {$g ni $all_groupnames} { + error "colourtable_web group name '$g' not known. Known colour groups: $all_groupnames" + } + } + set show_groups $groups + } + } + set grouptables [list] + set white_fg_list [list\ + mediumvioletred deeppink\ + darkred red firebrick crimson indianred\ + orangered\ + maroon brown saddlebrown sienna\ + indigo purple darkmagenta darkviolet darkslateblue blueviolet darkorchid fuchsia magenta slateblue mediumslateblue\ + midnightblue navy darkblue mediumblue blue royalblue steelblue dodgerblue\ + teal darkcyan\ + darkgreen green darkolivegreen forestgreen seagreen olive olivedrab\ + black darkslategray dimgray slategray\ + ] + foreach g $show_groups { + #upvar WEB_colour_map_$g map_$g + variable WEB_colour_map_$g + set t [textblock::class::table new] + $t configure -show_edge 0 -show_seps 0 -show_header 1 + tcl::dict::for {cname cdec} [set WEB_colour_map_$g] { + $t add_row [list "$cname " "[colour_dec2hex $cdec] " $cdec] + if {$cname in $white_fg_list} { + set fg "web-white" + } else { + set fg "web-black" + } + #$t configure_row [expr {[$t row_count]-1}] -ansibase [a+ {*}$fc $fg Rgb-$cdec] + $t configure_row [expr {[$t row_count]-1}] -ansibase [a+ {*}$fc $fg Web-$cname] + } + $t configure -frametype {} + $t configure_column 0 -headers [list "[tcl::string::totitle $g] colours"] + $t configure_column 0 -header_colspans [list any] + $t configure -ansibase_header [a+ {*}$fc web-black Web-white] + lappend grouptables [$t print] + $t destroy + } + #set displaytable [textblock::class::table new] + set displaytable [textblock::list_as_table -columns 3 -return tableobject $grouptables] + $displaytable configure -show_header 0 -show_vseps 0 + #return $displaytable + set result [$displaytable print] + $displaytable destroy + return $result + } + proc colourtable_x11diff {args} { + variable X11_colour_map_diff + variable WEB_colour_map + set opts [tcl::dict::create\ + -forcecolour 0\ + -return "string"\ + ] + foreach {k v} $args { + switch -- $k { + -return - -forcecolour { + tcl::dict::set opts $k $v + } + default { + error "colourtable_x11diff unrecognised option '$k'. Known options [tcl::dict::keys $opts]" + } + } + } + set fc "" + if {[tcl::dict::get $opts -forcecolour]} { + set fc "forcecolour" + } + + set comparetables [list] ;# 2 side by side x11 and web + + # -- --- --- + set t [textblock::class::table new] + $t configure -show_edge 0 -show_seps 0 -show_header 1 + tcl::dict::for {cname cdec} [set X11_colour_map_diff] { + $t add_row [list "$cname " "[colour_dec2hex $cdec] " $cdec] + set fg "web-white" + $t configure_row [expr {[$t row_count]-1}] -ansibase [a+ {*}$fc $fg X11-$cname] + } + $t configure -frametype block + $t configure_column 0 -headers [list "X11"] + $t configure_column 0 -header_colspans [list any] + $t configure -ansibase_header [a+ {*}$fc web-black Web-white] + lappend comparetables [$t print] + $t destroy + # -- --- --- + + set WEB_map_subset [tcl::dict::create] + tcl::dict::for {k v} $X11_colour_map_diff { + tcl::dict::set WEB_map_subset $k [tcl::dict::get $WEB_colour_map $k] + } + + # -- --- --- + set t [textblock::class::table new] + $t configure -show_edge 0 -show_seps 0 -show_header 1 + tcl::dict::for {cname cdec} [set WEB_map_subset] { + $t add_row [list "$cname " "[colour_dec2hex $cdec] " $cdec] + set fg "web-white" + $t configure_row [expr {[$t row_count]-1}] -ansibase [a+ {*}$fc $fg Web-$cname] + } + $t configure -frametype block + $t configure_column 0 -headers [list "Web"] + $t configure_column 0 -header_colspans [list any] + $t configure -ansibase_header [a+ {*}$fc web-black Web-white] + lappend comparetables [$t print] + $t destroy + # -- --- --- + + set displaytable [textblock::list_as_table -columns 2 -return tableobject $comparetables] + $displaytable configure -show_header 0 -show_vseps 0 + + if {[tcl::dict::get $opts -return] eq "string"} { + set result [$displaytable print] + $displaytable destroy + return $result + } + + return $displaytable + } + proc a? {args} { + #*** !doctools + #[call [fun a?] [opt {ansicode...}]] + #[para]Return an ansi string representing a table of codes and a panel showing the colours + variable SGR_setting_map + variable SGR_colour_map + set fcposn [lsearch $args "forcecol*"] + set fc "" + set opt_forcecolour 0 + if {$fcposn >= 0} { + set fc "forcecolour" + set opt_forcecolour 1 + set args [lremove $args $fcposn] + } + + if {![llength $args]} { + set out "" + set indent " " + set RST [a] + append out "[a+ {*}$fc web-white]Extended underlines$RST" \n + set undercurly "undercurly \[a+ undercurly und-199-21-133\]text\[a] -> [a+ undercurly und-199-21-133]text$RST" + set underdotted "underdotted \[a+ underdotted und#FFD700\]text\[a] -> [a+ underdotted und#FFD700]text$RST" + set underdashed "underdashed \[a+ underdashed undt-45\]text\[a] -> [a+ underdashed undt-45]text$RST" + set underline_c "named terminal colour SGR underline \[a+ underline undt-deeppink1\]text\[a] -> [a+ underline undt-deeppink1]text$RST" + append out "${indent}$undercurly $underdotted" \n + append out "${indent}$underdashed" \n + append out "${indent}$underline_c" \n + append out "${indent}Extended underlines/colours can suppress other SGR codes on terminals that don't support them if codes are merged." \n + append out "${indent}punk::ansi tries to keep them in separate escape sequences (standard SGR followed by extended) even during merge operations to avoid this." \n + append out "${indent}If a fallback to standard underline is required, underline should be added along with extended codes such as underlinedotted, underlinedouble etc" \n + append out "${indent}e.g cyan with curly yellow underline or fallback all cyan underlined \[a+ cyan undercurly underline undt-yellow\]text\[a] -> [a+ {*}$fc cyan undercurly underline undt-yellow]text$RST" \n + append out "[a+ {*}$fc web-white]Standard SGR colours and attributes $RST" \n + set settings_applied $SGR_setting_map + set strmap [list] + #safe jumptable test + #dict for {k v} $SGR_setting_map {} + tcl::dict::for {k v} $SGR_setting_map { + switch -- $k { + bold - dim - italic - doubleunderline - blink - fastblink - strike - overline - framecircle { + lappend strmap " $k " " [a+ $k]$k$RST " + } + noreverse - nounderline { + #prefixed version will match before unprefixed - will not be subject to further replacement scanning + lappend strmap "$k" "[a+ $k]$k$RST" ;#could replace with self - but may as well put in punk::ansi::sgr_cache (can make cache a little neater to display) + } + underline - reverse - frame { + #1st coloumn - no leading space + lappend strmap "$k " "[a+ $k]$k$RST " + } + } + } + set settings_applied [tcl::string::trim $SGR_setting_map \n] + try { + package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try + package require textblock + + append out [textblock::join -- $indent [tcl::string::map $strmap $settings_applied]] \n + append out [textblock::join -- $indent [tcl::string::trim $SGR_colour_map \n]] \n + append out [textblock::join -- $indent "Example: \[a+ bold red White underline\]text\[a] -> [a+ bold red White underline]text[a]"] \n \n + set bgname "Web-white" + set map1 [colourmap1 -bg $bgname -forcecolour $opt_forcecolour] + set map1 [overtype::centre -transparent 1 $map1 "[a {*}$fc black $bgname]Standard colours[a]"] + set map2 [colourmap2 -bg $bgname -forcecolour $opt_forcecolour] + set map2 [overtype::centre -transparent 1 $map2 "[a {*}$fc black $bgname]High-intensity colours[a]"] + append out [textblock::join -- $indent [textblock::join -- $map1 $map2]] \n + append out "[a+ {*}$fc web-white]216 colours of 256 terminal colours (To see names, use: a? term ?pastel? ?rainbow?)[a]" \n + append out [textblock::join -- $indent [colourblock_216 -forcecolour $opt_forcecolour]] \n + append out "[a+ {*}$fc web-white]24 Greyscale colours[a]" \n + append out [textblock::join -- $indent [colourblock_24 -forcecolour $opt_forcecolour]] \n + append out \n + append out [textblock::join -- $indent "Example: \[a+ Term-92 term-49\]text\[a] -> [a+ {*}$fc Term-92 term-49]text[a]"] \n + append out [textblock::join -- $indent "Example: \[a+ Term-lightsteelblue term-gold1\]text\[a] -> [a+ {*}$fc Term-lightsteelblue term-gold1]text[a]"] \n + append out [textblock::join -- $indent "Example: \[a+ term-lightsteelblue Term-gold1\]text\[a] -> [a+ {*}$fc term-lightsteelblue Term-gold1]text[a]"] \n + append out \n + append out "[a+ {*}$fc web-white]16 Million colours[a]" \n + #tcl::dict::set WEB_colour_map mediumvioletred 199-21-133 ;# #C71585 + append out [textblock::join -- $indent "Example: \[a+ rgb-199-21-133\]text\[a] -> [a+ {*}$fc rgb-199-21-133]text[a]"] \n + append out [textblock::join -- $indent "Example: \[a+ Rgb#C71585\]text\[a] -> [a+ {*}$fc Rgb#C71585]text[a]"] \n + append out [textblock::join -- $indent "Examine a sequence: a? bold rgb-46-139-87 Rgb#C71585 "] \n + append out \n + append out "[a+ {*}$fc web-white]Web colours[a]" \n + append out [textblock::join -- $indent "To see all names use: a? web"] \n + append out [textblock::join -- $indent "To see specific colour groups use: a? web groupname1 groupname2..."] \n + append out [textblock::join -- $indent "Valid group names (can be listed in any order): basic pink red orange yellow brown purple blue cyan green white grey"] \n + append out \n + append out [textblock::join -- $indent "Example: \[a+ Web-springgreen web-crimson\]text\[a] -> [a+ {*}$fc Web-springgreen web-coral]text[a]"] \n + append out \n + append out "[a+ {*}$fc web-white]X11 colours[a] - mostly match Web colours" \n + append out [textblock::join -- $indent "To see differences: a? x11"] \n + + if {[tcl::info::exists ::punk::console::colour_disabled] && $::punk::console::colour_disabled} { + append out \n + if {$fc ne ""} { + append out "[a+ {*}$fc web-white]Colour is currently disabled - returning with colour anyway because 'forcecolour' argument was supplied[a]" \n + } else { + append out "Colour is currently disabled - to return with colour anyway - add the 'forcecolour' argument" \n + } + } + + } on error {result options} { + puts stderr "Failed to draw colourmap" + puts stderr "$result" + } finally { + return $out + } + } else { + switch -- [lindex $args 0] { + term { + set termargs [lrange $args 1 end] + foreach ta $termargs { + switch -- $ta { + pastel - rainbow {} + default {error "unrecognised term option '$ta'. Known values: pastel rainbow"} + } + } + set out "16 basic colours\n" + append out [colourtable_16_names -forcecolour $opt_forcecolour] \n + append out "216 colours\n" + append out [colourtable_216_names -forcecolour $opt_forcecolour] \n + append out "24 greyscale colours\n" + append out [colourtable_24_names -forcecolour $opt_forcecolour] + foreach ta $termargs { + switch -- $ta { + pastel { + append out \n + append out "Pastel Colour Space (punk::ansi::colourtable_term_pastel)\n" + append out [colourtable_term_pastel -forcecolour $opt_forcecolour] + } + rainbow { + append out \n + append out "Rainbow Colours (punk::ansi::colourtable_term_rainbow)\n" + append out [colourtable_term_rainbow -forcecolour $opt_forcecolour] + } + } + } + append out "\nNote: The 256 term colours especially 0-15 may be altered by terminal pallette settings or ansi OSC 4 codes, so specific RGB values are unavailable" + return $out + } + web { + return [colourtable_web -forcecolour $opt_forcecolour -groups [lrange $args 1 end]] + } + x11 { + set out "" + append out " Mostly same as web - known differences displayed" \n + append out [colourtable_x11diff -forcecolour $opt_forcecolour] + return $out + } + } + + variable WEB_colour_map + variable X11_colour_map + variable TERM_colour_map + variable TERM_colour_map_reverse + variable SGR_map + + set t [textblock::class::table new] + $t configure -show_edge 0 -show_seps 1 -show_header 0 + + set resultlist [list] + foreach i $args { + set f4 [tcl::string::range $i 0 3] + set s [a+ {*}$fc $i]sample + switch -- $f4 { + web- - Web- - WEB- { + set tail [tcl::string::tolower [tcl::string::trim [tcl::string::range $i 4 end] -]] + if {[tcl::dict::exists $WEB_colour_map $tail]} { + set dec [tcl::dict::get $WEB_colour_map $tail] + set hex [colour_dec2hex $dec] + set descr "$hex $dec" + } else { + set descr "UNKNOWN colour for web" + } + $t add_row [list $i $descr $s [ansistring VIEW $s]] + } + term - Term - undt { + set tail [tcl::string::trim [tcl::string::range $i 4 end] -] + if {[tcl::string::is integer -strict $tail]} { + if {$tail < 256} { + set descr "[tcl::dict::get $TERM_colour_map_reverse $tail]" + } else { + set descr "Invalid (> 255)" + } + } else { + set tail [tcl::string::tolower $tail] + if {[tcl::dict::exists $TERM_colour_map $tail]} { + set descr [tcl::dict::get $TERM_colour_map $tail] + } else { + set descr "UNKNOWN colour for term" + } + } + $t add_row [list $i $descr $s [ansistring VIEW $s]] + } + x11- - X11- { + set tail [tcl::string::tolower [tcl::string::trim [tcl::string::range $i 4 end] -]] + if {[tcl::dict::exists $X11_colour_map $tail]} { + set dec [tcl::dict::get $X11_colour_map $tail] + set hex [colour_dec2hex $dec] + set descr "$hex $dec" + } else { + set descr "UNKNOWN colour for x11" + } + $t add_row [list $i $descr $s [ansistring VIEW $s]] + } + rgb- - Rgb- - RGB- - + rgb0 - rgb1 - rgb2 - rgb3 - rgb4 - rgb5 - rgb6 - rgb7 - rgb8 - rgb9 - + Rgb0 - Rgb1 - Rgb2 - Rgb3 - Rgb4 - Rgb5 - Rgb6 - Rgb7 - Rgb8 - Rgb9 - + RGB0 - RGB1 - RGB2 - RGB3 - RGB4 - RGB5 - RGB6 - RGB7 - RGB8 - RGB9 - + rgb# - Rgb# - RGB# - + und# - und- { + if {[tcl::string::index $i 3] eq "#"} { + set tail [tcl::string::range $i 4 end] + set hex $tail + set dec [colour_hex2dec $hex] + set info $dec ;#show opposite type as first line of info col + } else { + set tail [tcl::string::trim [tcl::string::range $i 3 end] -] + set dec $tail + set hex [colour_dec2hex $dec] + set info $hex + } + + set webcolours_i [lsearch -all $WEB_colour_map $dec] + set webcolours [list] + foreach ci $webcolours_i { + lappend webcolours [lindex $WEB_colour_map $ci-1] + } + set x11colours [list] + set x11colours_i [lsearch -all $X11_colour_map $dec] + foreach ci $x11colours_i { + set c [lindex $X11_colour_map $ci-1] + if {$c ni $webcolours} { + lappend x11colours $c + } + } + foreach c $webcolours { + append info \n web-$c + } + foreach c $x11colours { + append info \n x11-$c + } + $t add_row [list $i "$info" $s [ansistring VIEW $s]] + } + unde { + switch -- $i { + undercurly - underdotted - underdashed - undersingle - underdouble { + $t add_row [list $i extended $s [ansistring VIEW $s]] + } + underline { + $t add_row [list $i "SGR 4" $s [ansistring VIEW $s]] + } + default { + $t add_row [list $i UNKNOWN $s [ansistring VIEW $s]] + } + } + } + default { + if {[tcl::string::is integer -strict $i]} { + set rmap [lreverse $SGR_map] + $t add_row [list $i "SGR [tcl::dict::get $rmap $i]" $s [ansistring VIEW $s]] + } else { + if {[tcl::dict::exists $SGR_map $i]} { + $t add_row [list $i "SGR [tcl::dict::get $SGR_map $i]" $s [ansistring VIEW $s]] + } else { + $t add_row [list $i UNKNOWN $s [ansistring VIEW $s]] + } + } + } + } + } + set ansi [a+ {*}$fc {*}$args] + set s ${ansi}sample + #set merged [punk::ansi::codetype::sgr_merge_singles [list $ansi]] + set merged [punk::ansi::codetype::sgr_merge [list $ansi]] + set s2 ${merged}sample + #lappend resultlist "RESULT: [a+ {*}$args]sample[a]" + $t add_row [list RESULT "" $s [ansistring VIEW $s]] + if {$ansi ne $merged} { + if {[tcl::string::length $merged] < [tcl::string::length $ansi]} { + #only refer to redundancies if shorter - merge may reorder - REVIEW + set warning "[a+ web-red Web-yellow]REDUNDANCIES FOUND" + } else { + set warning "" + } + $t add_row [list MERGED $warning $s2 [ansistring VIEW $s2]] + } + set result [$t print] + $t destroy + return $result + } + } + + #REVIEW! note that OSC 4 can change the 256 color pallette + #e.g \x1b\]4\;1\;#HHHHHH\x1b\\ + # (or with colour name instead of rgb #HHHHHH on for example wezterm) + + #Q: If we can't detect OSC 4 - how do we know when to invalidate/clear at least the 256 color portion of the cache? + #A: The cache values should still be valid - and the terminal should display the newly assigned colour. + # If in line mode - perhaps readline or something else is somehow storing the rgb values and replaying invalid colours? + # On wezterm - we can get cells changing colour as we scroll after a pallette change - so something appears to be caching colours + + variable sgr_cache + set sgr_cache [tcl::dict::create] + + #sgr_cache clear called by punk::console::ansi when set to off + proc sgr_cache {args} { + set argd [punk::args::get_dict { + *proc -name punk::ansi::sgr_cache -help "Convenience function to view and optionally clear the ansi character attribute cache (ansi SGR codes) + " + -action -default "" -choices "clear" -help "-action clear will unset the keys in the punk::ansi::sgr_cache dict + This is called automatically when setting 'colour false' in the console" + + -pretty -default 1 -type boolean -help "use 'pdict punk::ansi::sgr_cache */%str,%ansiview' output" + *values -min 0 -max 0 + } $args] + set action [dict get $argd opts -action] + set pretty [dict get $argd opts -pretty] + + variable sgr_cache + if {$action eq "clear"} { + set sgr_cache [tcl::dict::create] + return "sgr_cache cleared" + } + if {$pretty} { + #return [pdict -channel none sgr_cache */%str,%ansiview] + return [pdict -channel none sgr_cache */%rpadstr-"sample",%ansiviewstyle] + } + + if {[catch { + set termwidth [tcl::dict::get [punk::console::get_size] columns] + } errM]} { + set termwidth 80 + } + set termwidth [expr [$termwidth -3]] + set out "" + set linelen 0 + set RST [a] + set lines [list] + set line "" + #todo - terminal width? table? + tcl::dict::for {key ansi} $sgr_cache { + set thislen [expr {[tcl::string::length $key]+1}] + if {$linelen + $thislen >= $termwidth-1} { + lappend lines $line + set line "$ansi$key$RST " + set linelen $thislen + } else { + append line "$ansi$key$RST " + incr linelen $thislen + } + } + if {[tcl::string::length $line]} { + lappend lines $line + } + return [join $lines \n] + } + + proc a+ {args} { + #*** !doctools + #[call [fun a+] [opt {ansicode...}]] + #[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first + #[para] e.g to set foreground red and bold + #[para]punk::ansi::a red bold + #[para]to set background red + #[para]punk::ansi::a Red + #[para]see [cmd punk::ansi::a?] to display a list of codes + + #function name part of cache-key because a and a+ return slightly different results (a has leading reset) + variable sgr_cache + set cache_key "a+ $args" ;#ensure cache_key static - we may remove for example 'forcecolour' from args - but it needs to remain part of cache_key + if {[tcl::dict::exists $sgr_cache $cache_key]} { + return [tcl::dict::get $sgr_cache $cache_key] + } + + #don't disable ansi here. + #we want this to be available to call even if ansi is off + variable WEB_colour_map + variable TERM_colour_map + + + set colour_disabled 0 + #whatever function disables or re-enables colour should have made a call to punk::ansi::sgr_cache clear + if {[tcl::info::exists ::punk::console::colour_disabled] && $::punk::console::colour_disabled} { + set colour_disabled 1 + } + #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. + set forcecolour 0 + set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + if {$fcpos >= 0} { + set forcecolour 1 + set args [lremove $args $fcpos] + } + + set t [list] + set e [list] ;#extended codes needing to go in own escape sequence + foreach i $args { + set f4 [tcl::string::range $i 0 3] + switch -- $f4 { + web- { + #variable WEB_colour_map + #upvar ::punk::ansi::WEB_colour_map WEB_colour_map + #foreground web colour + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $WEB_colour_map $cname]} { + set rgbdash [tcl::dict::get $WEB_colour_map $cname] + set rgb [tcl::string::map { - ;} $rgbdash] + lappend t "38;2;$rgb" + } else { + puts stderr "ansi web colour unmatched: '$i' in call 'a+ $args'" + } + } + Web- - WEB- { + #variable WEB_colour_map + #upvar ::punk::ansi::WEB_colour_map WEB_colour_map + #background web colour + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $WEB_colour_map $cname]} { + lappend t "48;2;[tcl::string::map {- ;} [tcl::dict::get $WEB_colour_map $cname]]" + } else { + puts stderr "ansi Web colour unmatched: '$i' in call 'a+ $args'" + } + } + rese {lappend t 0 ;#reset} + bold {lappend t 1} + dim {lappend t 2} + blin { + #blink + lappend t 5 + } + fast { + #fastblink + lappend t 6 + } + nobl { + #noblink + lappend t 25 + } + hide {lappend t 8} + norm {lappend t 22 ;#normal} + unde { + #TODO - fix + # extended codes with colon suppress normal SGR attributes when in same escape sequence on terminal that don't support the extended codes. + # need to emit in + switch -- $i { + underline { + lappend t 4 ;#underline + } + underextendedoff { + #lremove any existing 4:1 etc + #NOTE struct::set result order can differ depending on whether tcl/critcl imp used + #set e [struct::set difference $e [list 4:1 4:2 4:3 4:4 4:5]] + set e [punk::lib::ldiff $e [list 4:1 4:2 4:3 4:4 4:5]] + lappend e 4:0 + } + undersingle { + lappend e 4:1 + } + underdouble { + lappend e 4:2 + } + undercurly { + lappend e 4:3 + } + underdotted { + lappend e 4:4 + } + underdashed { + lappend e 4:5 + } + default { + puts stderr "ansi term unmatched: unde* '$i' in call 'a $args' (underline,undersingle,underdouble,undercurly,underdotted,underdashed)" + } + } + } + doub {lappend t 21 ;#doubleunderline} + noun { + lappend t 24 ;#nounderline + #set e [struct::set difference $e [list 4:1 4:2 4:3 4:4 4:5]] + lappend e 4:0 + } + stri {lappend t 9 ;#strike} + nost {lappend t 29 ;#nostrike} + ital {lappend t 3 ;#italic} + noit {lappend t 23 ;#noitalic} + reve {lappend t 7 ;#reverse} + nore {lappend t 27 ;#noreverse} + defa { + switch -- $i { + defaultfg { + lappend t 39 + } + defaultbg { + lappend t 49 + } + defaultund { + lappend t 59 + } + default { + puts stderr "ansi term unmatched: defa* '$i' in call 'a $args' (defaultfg,defaultbg,defaultund)" + } + } + } + nohi {lappend t 28 ;#nohide} + over {lappend t 53 ;#overline} + noov {lappend t 55 ;#nooverline} + fram { + if {$i eq "frame"} { + lappend t 51 ;#frame + } else { + lappend t 52 ;#framecircle + } + } + nofr {lappend t 54 ;#noframe} + blac {lappend t 30 ;#black} + red {lappend t 31} + gree {lappend t 32 ;#green} + yell {lappend t 33 ;#yellow} + blue {lappend t 34} + purp {lappend t 35 ;#purple} + cyan {lappend t 36} + whit {lappend t 37 ;#white} + Blac {lappend t 40 ;#Black} + Red {lappend t 41} + Gree {lappend t 42 ;#Green} + Yell {lappend t 43 ;#Yellow} + Blue {lappend t 44} + Purp {lappend t 45 ;#Purple} + Cyan {lappend t 46} + Whit {lappend t 47 ;#White} + brig { + switch -- $i { + brightblack {lappend t 90} + brightred {lappend t 91} + brightgreen {lappend t 92} + brightyellow {lappend t 93} + brightblue {lappend t 94} + brightpurple {lappend t 95} + brightcyan {lappend t 96} + brightwhite {lappend t 97} + } + } + Brig { + switch -- $i { + Brightblack {lappend t 100} + Brightred {lappend t 101} + Brightgreen {lappend t 102} + Brightyellow {lappend t 103} + Brightblue {lappend t 104} + Brightpurple {lappend t 105} + Brightcyan {lappend t 106} + Brightwhite {lappend t 107} + } + } + term { + #variable TERM_colour_map + #256 colour foreground by Xterm name or by integer + #name is xterm name or colour index from 0 - 255 + set cc [tcl::string::trim [tcl::string::tolower [tcl::string::range $i 4 end]] -] + if {[tcl::string::is integer -strict $cc] & $cc < 256} { + lappend t "38;5;$cc" + } else { + if {[tcl::dict::exists $TERM_colour_map $cc]} { + lappend t "38;5;[tcl::dict::get $TERM_colour_map $cc]" + } else { + puts stderr "ansi term colour unmatched: '$i' in call 'a+ $args'" + } + } + } + Term - TERM { + #variable TERM_colour_map + #256 colour background by Xterm name or by integer + set cc [tcl::string::trim [tcl::string::tolower [tcl::string::range $i 4 end]] -] + if {[tcl::string::is integer -strict $cc] && $cc < 256} { + lappend t "48;5;$cc" + } else { + if {[tcl::dict::exists $TERM_colour_map $cc]} { + lappend t "48;5;[tcl::dict::get $TERM_colour_map $cc]" + } else { + puts stderr "ansi Term colour unmatched: '$i' in call 'a+ $args'" + } + } + } + rgb- - rgb0 - rgb1 - rgb2 - rgb3 - rgb4 - rgb5 - rgb6 - rgb7 - rgb8 - rgb9 { + #decimal rgb foreground + #allow variants rgb-xxx-xxx-xxx and rgbxxx-xxx-xxx + set rgbspec [tcl::string::trim [tcl::string::range $i 3 end] -] + set rgb [tcl::string::map [list - {;} , {;}] $rgbspec] + lappend t "38;2;$rgb" + } + Rgb- - RGB- - Rgb0 - Rgb1 - Rgb2 - Rgb3 - Rgb4 - Rgb5 - Rgb6 - Rgb7 - Rgb8 - Rgb9 - RGB0 - RGB1 - RGB2 - RGB3 - RGB4 - RGB5 - RGB6 - RGB7 - RGB8 - RGB9 { + #decimal rgb background + set rgbspec [tcl::string::trim [tcl::string::range $i 3 end] -] + set rgb [tcl::string::map [list - {;} , {;}] $rgbspec] + lappend t "48;2;$rgb" + } + "rgb#" { + #hex rgb foreground + set hex6 [tcl::string::trim [tcl::string::range $i 4 end] -] + set rgb [join [::scan $hex6 %2X%2X%2X] {;}] + lappend t "38;2;$rgb" + } + "Rgb#" - "RGB#" { + #hex rgb background + set hex6 [tcl::string::trim [tcl::string::range $i 4 end] -] + set rgb [join [::scan $hex6 %2X%2X%2X] {;}] + lappend t "48;2;$rgb" + } + und- - und0 - und1 - und2 - und3 - und4 - und5 - und6 - und7 - und8 - und9 { + #decimal rgb underline + #allow variants und-xxx-xxx-xxx and undxxx-xxx-xxx + set rgbspec [tcl::string::trim [tcl::string::range $i 3 end] -] + set rgb [tcl::string::map [list - {:} , {:}] $rgbspec] + lappend e "58:2::$rgb" + } + "und#" { + #hex rgb underline - (e.g kitty, wezterm) - uses colons as separators + set hex6 [tcl::string::trim [tcl::string::range $i 4 end] -] + set rgb [join [::scan $hex6 %2X%2X%2X] {:}] + lappend e "58:2::$rgb" + } + undt { + #variable TERM_colour_map + #256 colour underline by Xterm name or by integer + #name is xterm name or colour index from 0 - 255 + set cc [tcl::string::trim [tcl::string::tolower [tcl::string::range $i 4 end]] -] + if {[tcl::string::is integer -strict $cc] & $cc < 256} { + lappend e "58:5:$cc" + } else { + if {[tcl::dict::exists $TERM_colour_map $cc]} { + lappend e "58:5:[tcl::dict::get $TERM_colour_map $cc]" + } else { + puts stderr "ansi term underline colour unmatched: '$i' in call 'a $args'" + } + } + } + x11- { + variable X11_colour_map + #foreground X11 names + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $X11_colour_map $cname]} { + set rgbdash [tcl::dict::get $X11_colour_map $cname] + set rgb [tcl::string::map [list - {;}] $rgbdash] + lappend t "38;2;$rgb" + } else { + puts stderr "ansi x11 colour unmatched: '$i' in call 'a+ $args'" + } + } + X11- { + variable X11_colour_map + #background X11 names + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $X11_colour_map $cname]} { + set rgbdash [tcl::dict::get $X11_colour_map $cname] + set rgb [tcl::string::map [list - {;}] $rgbdash] + lappend t "48;2;$rgb" + } else { + puts stderr "ansi X11 colour unmatched: '$i'" + } + } + default { + if {[tcl::string::is integer -strict $i] || [tcl::string::first ";" $i] > 0} { + lappend t $i + } elseif {[tcl::string::first : $i] > 0} { + lappend e $i + } else { + puts stderr "ansi name unmatched: '$i' in call 'a+ $args' Perhaps missing prefix? e.g web- x11- term- rgb# rgb-" + } + } + } + } + + #the performance penalty must not be placed on the standard colour_enabled path. + #This is punk. Colour is the happy path despite the costs. + #The no_color users will still get a performance boost from shorter string processing if that's one of their motivations. + #As no_color doesn't strip all ansi - the motivation for it should not generally be + if {$colour_disabled && !$forcecolour} { + set tkeep [list] + foreach code $t { + switch -- $code { + 0 - 1 - 2 - 3 - 23 - 4 - 21 - 24 - 5 - 6 - 25 - 7 - 27 - 8 - 28 - 9 - 29 - 22 - 39 - 49 - 53 - 55 - 51 - 52 - 54 - 59 { + #SGR underline and other non colour effects + lappend tkeep $code + } + } + } + set t $tkeep + set ekeep [list] + foreach code $e { + switch -- $code { + 4:0 - 4:1 - 4:2 - 4:3 - 4:4 - 4:5 { + lappend ekeep $code + } + } + } + set e $ekeep + } + + # \033 - octal. equivalently \x1b in hex which is more common in documentation + if {![llength $t]} { + if {![llength $e]} { + set result "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) + } else { + set result "\x1b\[[join $e {;}]m" + } + } else { + if {![llength $e]} { + set result "\x1b\[[join $t {;}]m" + } else { + set result "\x1b\[[join $t {;}]m\x1b\[[join $e {;}]m" + } + } + tcl::dict::set sgr_cache $cache_key $result + return $result + } + + proc a {args} { + #*** !doctools + #[call [fun a] [opt {ansicode...}]] + #[para]Returns the ansi code to reset any current settings and apply those from the supplied list + #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text + #[para] e.g to set foreground red and bold + #[para]punk::ansi::a red bold + #[para]to set background red + #[para]punk::ansi::a Red + #[para]see [cmd punk::ansi::a?] to display a list of codes + + #It's important to put the functionname in the cache-key because a and a+ return slightly different results + variable sgr_cache + set cache_key "a $args" + if {[tcl::dict::exists $sgr_cache $cache_key]} { + return [tcl::dict::get $sgr_cache $cache_key] + } + + #don't disable ansi here. + #we want this to be available to call even if ansi is off + variable WEB_colour_map + variable TERM_colour_map + + set colour_disabled 0 + #whatever function disables or re-enables colour should have made a call to punk::ansi::sgr_cache -action clear + if {[tcl::info::exists ::punk::console::colour_disabled] && $::punk::console::colour_disabled} { + set colour_disabled 1 + } + #allow a mechanism to override the colour_disabled terminal preference - for code that is generating colour for something else - don't let no_color ruin everything. + set forcecolour 0 + set fcpos [lsearch $args "forcecol*"] ;#allow forcecolor forcecolour + if {$fcpos >=0} { + set forcecolour 1 + set args [lremove $args $fcpos] + } + + set t [list] + set e [list] ;#extended codes will suppress standard SGR colours and attributes if merged in same escape sequence + foreach i $args { + set f4 [tcl::string::range $i 0 3] + switch -- $f4 { + web- { + #variable WEB_colour_map + #upvar ::punk::ansi::WEB_colour_map WEB_colour_map + #foreground web colour + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $WEB_colour_map $cname]} { + set rgbdash [tcl::dict::get $WEB_colour_map $cname] + set rgb [tcl::string::map { - ;} $rgbdash] + lappend t "38;2;$rgb" + } else { + puts stderr "ansi web colour unmatched: '$i' in call 'a $args'" + } + } + Web- - WEB- { + #variable WEB_colour_map + #upvar ::punk::ansi::WEB_colour_map WEB_colour_map + #background web colour + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $WEB_colour_map $cname]} { + lappend t "48;2;[tcl::string::map {- ;} [tcl::dict::get $WEB_colour_map $cname]]" + } else { + puts stderr "ansi Web colour unmatched: '$i' in call 'a $args'" + } + } + rese {lappend t 0 ;#reset} + bold {lappend t 1} + dim {lappend t 2} + blin { + #blink + lappend t 5 + } + fast { + #fastblink + lappend t 6 + } + nobl { + #noblink + lappend t 25 + } + hide {lappend t 8} + norm {lappend t 22 ;#normal} + unde { + switch -- $i { + underline { + lappend t 4 ;#underline + } + underextendedoff { + #lremove any existing 4:1 etc + #use of struct::set with critcl is fast, but will reorder e (with differences depending on tcl vs critcl) + #set e [struct::set difference $e [list 4:1 4:2 4:3 4:4 4:5]] + set e [punk::lib::ldiff $e [list 4:1 4:2 4:3 4:4 4:5]] + lappend e 4:0 + } + undersingle { + lappend e 4:1 + } + underdouble { + lappend e 4:2 + } + undercurly { + lappend e 4:3 + } + underdotted { + lappend e 4:4 + } + underdashed { + lappend e 4:5 + } + default { + puts stderr "ansi term unmatched: unde* '$i' in call 'a $args' (underline,undersingle,underdouble,undercurly,underdotted,underdashed)" + } + } + } + doub {lappend t 21 ;#doubleunderline} + noun { + lappend t 24 ;#nounderline + #set e [struct::set difference $e [list 4:1 4:2 4:3 4:4 4:5]] + lappend e 4:0 + } + stri {lappend t 9 ;#strike} + nost {lappend t 29 ;#nostrike} + ital {lappend t 3 ;#italic} + noit {lappend t 23 ;#noitalic} + reve {lappend t 7 ;#reverse} + nore {lappend t 27 ;#noreverse} + defa { + switch -- $i { + defaultfg { + lappend t 39 + } + defaultbg { + lappend t 49 + } + defaultund { + lappend t 59 + } + default { + puts stderr "ansi term unmatched: defa* '$i' in call 'a $args' (defaultfg,defaultbg,defaultund)" + } + } + } + nohi {lappend t 28 ;#nohide} + over {lappend t 53 ;#overline} + noov {lappend t 55 ;#nooverline} + fram { + if {$i eq "frame"} { + lappend t 51 ;#frame + } else { + lappend t 52 ;#framecircle + } + } + nofr {lappend t 54 ;#noframe} + blac {lappend t 30 ;#black} + red {lappend t 31} + gree {lappend t 32 ;#green} + yell {lappend t 33 ;#yellow} + blue {lappend t 34} + purp {lappend t 35 ;#purple} + cyan {lappend t 36} + whit {lappend t 37 ;#white} + Blac {lappend t 40 ;#Black} + Red {lappend t 41} + Gree {lappend t 42 ;#Green} + Yell {lappend t 43 ;#Yellow} + Blue {lappend t 44} + Purp {lappend t 45 ;#Purple} + Cyan {lappend t 46} + Whit {lappend t 47 ;#White} + brig { + switch -- $i { + brightblack {lappend t 90} + brightred {lappend t 91} + brightgreen {lappend t 92} + brightyellow {lappend t 93} + brightblue {lappend t 94} + brightpurple {lappend t 95} + brightcyan {lappend t 96} + brightwhite {lappend t 97} + } + } + Brig { + switch -- $i { + Brightblack {lappend t 100} + Brightred {lappend t 101} + Brightgreen {lappend t 102} + Brightyellow {lappend t 103} + Brightblue {lappend t 104} + Brightpurple {lappend t 105} + Brightcyan {lappend t 106} + Brightwhite {lappend t 107} + } + } + term { + #variable TERM_colour_map + #256 colour foreground by Xterm name or by integer + #name is xterm name or colour index from 0 - 255 + set cc [tcl::string::trim [tcl::string::tolower [tcl::string::range $i 4 end]] -] + if {[tcl::string::is integer -strict $cc] & $cc < 256} { + lappend t "38;5;$cc" + } else { + if {[tcl::dict::exists $TERM_colour_map $cc]} { + lappend t "38;5;[tcl::dict::get $TERM_colour_map $cc]" + } else { + puts stderr "ansi term colour unmatched: '$i' in call 'a $args'" + } + } + } + Term - TERM { + #variable TERM_colour_map + #256 colour background by Xterm name or by integer + set cc [tcl::string::trim [tcl::string::tolower [tcl::string::range $i 4 end]] -] + if {[tcl::string::is integer -strict $cc] && $cc < 256} { + lappend t "48;5;$cc" + } else { + if {[tcl::dict::exists $TERM_colour_map $cc]} { + lappend t "48;5;[tcl::dict::get $TERM_colour_map $cc]" + } else { + puts stderr "ansi Term colour unmatched: '$i' in call 'a $args'" + } + } + } + rgb- - rgb0 - rgb1 - rgb2 - rgb3 - rgb4 - rgb5 - rgb6 - rgb7 - rgb8 - rgb9 { + #decimal rgb foreground + #allow variants rgb-xxx-xxx-xxx and rgbxxx-xxx-xxx + set rgbspec [tcl::string::trim [tcl::string::range $i 3 end] -] + set rgb [tcl::string::map [list - {;} , {;}] $rgbspec] + lappend t "38;2;$rgb" + } + Rgb- - RGB- - Rgb0 - Rgb1 - Rgb2 - Rgb3 - Rgb4 - Rgb5 - Rgb6 - Rgb7 - Rgb8 - Rgb9 - RGB0 - RGB1 - RGB2 - RGB3 - RGB4 - RGB5 - RGB6 - RGB7 - RGB8 - RGB9 { + #decimal rgb background + set rgbspec [tcl::string::trim [tcl::string::range $i 3 end] -] + set rgb [tcl::string::map [list - {;} , {;}] $rgbspec] + lappend t "48;2;$rgb" + } + "rgb#" { + #hex rgb foreground + set hex6 [tcl::string::trim [tcl::string::range $i 4 end] -] + set rgb [join [::scan $hex6 %2X%2X%2X] {;}] + lappend t "38;2;$rgb" + } + "Rgb#" - "RGB#" { + #hex rgb background + set hex6 [tcl::string::trim [tcl::string::range $i 4 end] -] + set rgb [join [::scan $hex6 %2X%2X%2X] {;}] + lappend t "48;2;$rgb" + } + und- - und0 - und1 - und2 - und3 - und4 - und5 - und6 - und7 - und8 - und9 { + #decimal rgb underline + #allow variants und-xxx-xxx-xxx and undxxx-xxx-xxx + set rgbspec [tcl::string::trim [tcl::string::range $i 3 end] -] + set rgb [tcl::string::map [list - {:} , {:}] $rgbspec] + lappend e "58:2::$rgb" + } + "und#" { + #hex rgb underline - (e.g kitty, wezterm) - uses colons as separators + set hex6 [tcl::string::trim [tcl::string::range $i 4 end] -] + set rgb [join [::scan $hex6 %2X%2X%2X] {:}] + lappend e "58:2::$rgb" + } + undt { + #variable TERM_colour_map + #256 colour underline by Xterm name or by integer + #name is xterm name or colour index from 0 - 255 + set cc [tcl::string::trim [tcl::string::tolower [tcl::string::range $i 4 end]] -] + if {[tcl::string::is integer -strict $cc] & $cc < 256} { + lappend e "58:5:$cc" + } else { + if {[tcl::dict::exists $TERM_colour_map $cc]} { + lappend e "58:5:[tcl::dict::get $TERM_colour_map $cc]" + } else { + puts stderr "ansi term underline colour unmatched: '$i' in call 'a $args'" + } + } + } + x11- { + variable X11_colour_map + #foreground X11 names + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $X11_colour_map $cname]} { + set rgbdash [tcl::dict::get $X11_colour_map $cname] + set rgb [tcl::string::map [list - {;}] $rgbdash] + lappend t "38;2;$rgb" + } else { + puts stderr "ansi x11 colour unmatched: '$i'" + } + } + X11- { + variable X11_colour_map + #background X11 names + set cname [tcl::string::tolower [tcl::string::range $i 4 end]] + if {[tcl::dict::exists $X11_colour_map $cname]} { + set rgbdash [tcl::dict::get $X11_colour_map $cname] + set rgb [tcl::string::map [list - {;}] $rgbdash] + lappend t "48;2;$rgb" + } else { + puts stderr "ansi X11 colour unmatched: '$i'" + } + } + default { + if {[tcl::string::is integer -strict $i] || [tcl::string::first ";" $i] > 0} { + lappend t $i + } elseif {[tcl::string::first : $i] > 0} { + lappend e $i + } else { + puts stderr "ansi name unmatched: '$i' in call 'a $args' Perhaps missing prefix? e.g web- x11- term- rgb# rgb-" + } + } + } + } + + if {$colour_disabled && !$forcecolour} { + set tkeep [list] + foreach code $t { + switch -- $code { + 0 - 1 - 2 - 3 - 23 - 4 - 21 - 24 - 5 - 6 - 25 - 7 - 27 - 8 - 28 - 9 - 29 - 22 - 39 - 49 - 53 - 55 - 51 - 52 - 54 - 59 { + #SGR underline and other non colour effects + lappend tkeep $code + } + } + } + set t $tkeep + set ekeep [list] + foreach code $e { + switch -- $code { + 4:0 - 4:1 - 4:2 - 4:3 - 4:4 - 4:5 { + lappend ekeep $code + } + } + } + set e $ekeep + } + + # \033 - octal. equivalently \x1b in hex which is more common in documentation + # empty list [a] should do reset - same for [a nonexistant] + # explicit reset at beginning of parameter list for a= (as opposed to a+) + set t [linsert $t[unset t] 0 0] + if {![llength $e]} { + set result "\x1b\[[join $t {;}]m" + } else { + set result "\x1b\[[join $t {;}]m\x1b\[[join $e {;}]m" + } + tcl::dict::set sgr_cache $cache_key $result + return $result + } + + proc ansiwrap {codes text} { + return [a {*}$codes]$text[a] + } + + + + proc get_code_name {code} { + #*** !doctools + #[call [fun get_code_name] [arg code]] + #[para]for example + #[para] get_code_name red will return 31 + #[para] get_code_name 31 will return red + variable SGR_map + set res [list] + foreach i [split $code ";"] { + set ix [lsearch -exact $SGR_map $i] + if {[tcl::string::is digit -strict $code]} { + if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} + } else { + #reverse lookup code from name + if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} + } + } + set res + } + proc reset {} { + #*** !doctools + #[call [fun reset]] + #[para]reset console + return "\x1bc" + } + proc reset_soft {} { + #*** !doctools + #[call [fun reset_soft]] + return \x1b\[!p + } + proc reset_colour {} { + #*** !doctools + #[call [fun reset_colour]] + #[para]reset colour only + return "\x1b\[0m" + } + + # -- --- --- --- --- + proc clear {} { + #*** !doctools + #[call [fun clear]] + return "\033\[2J" + } + proc clear_above {} { + #*** !doctools + #[call [fun clear_above]] + return \033\[1J + } + proc clear_below {} { + #*** !doctools + #[call [fun clear_below]] + return \033\[0J + } + + proc clear_all {} { + # - doesn't work?? + return \033\[3J + } + #see also erase_ functions + # -- --- --- --- --- + + proc cursor_on {} { + #*** !doctools + #[call [fun cursor_on]] + return "\033\[?25h" + } + proc cursor_off {} { + #*** !doctools + #[call [fun cursor_off]] + return "\033\[?25l" + } + + # -- --- --- --- --- + proc move {row col} { + #*** !doctools + #[call [fun move] [arg row] [arg col]] + #[para]Return an ansi sequence to move to row,col + #[para]aka cursor home + return \033\[${row}\;${col}H + } + proc move_emit {row col data args} { + #*** !doctools + #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] + #[para]Return an ansi string representing a move to row col with data appended + #[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points + #[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout + #[para]punk::console::move_emit_return will also return the cursor to the original position + #[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. + #[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. + #[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin + #[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. + #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: + #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] + #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. + + set out "" + if {$row eq "this"} { + append out \033\[\;${col}G$data + } else { + append out \033\[${row}\;${col}H$data + } + foreach {row col data} $args { + if {$row eq "this"} { + append out \033\[\;${col}G$data + } else { + append out \033\[${row}\;${col}H$data + } + } + return $out + } + proc move_forward {{n 1}} { + #*** !doctools + #[call [fun move_forward] [arg n]] + return \033\[${n}C + } + proc move_back {{n 1}} { + #*** !doctools + #[call [fun move_back] [arg n]] + return \033\[${n}D + } + proc move_up {{n 1}} { + #*** !doctools + #[call [fun move_up] [arg n]] + return \033\[${n}A + } + proc move_down {{n 1}} { + #*** !doctools + #[call [fun move_down] [arg n]] + return \033\[${n}B + } + proc move_column {col} { + #*** !doctools + #[call [fun move_column] [arg col]] + return \x1b\[${col}G + } + proc move_row {row} { + #*** !doctools + #[call [fun move_row] [arg row]] + #[para]VPA - Vertical Line Position Absolute + return \x1b\[${row}d + } + # -- --- --- --- --- + + proc cursor_save {} { + #*** !doctools + #[call [fun cursor_save]] + #[para] equivalent term::ansi::code::ctrl::sc + #[para] This is the ANSI/SCO cursor save as opposed to the DECSC version + #[para] On many terminals either will work - but cursor_save_dec is shorter and perhaps more widely supported + return \x1b\[s + } + proc cursor_restore {} { + #*** !doctools + #[call [fun cursor_restore]] + #[para] equivalent term::ansi::code::ctrl::rc + #[para] ANSI/SCO - see also cursor_restore_dec for the DECRC version + return \x1b\[u + } + proc cursor_save_dec {} { + #*** !doctools + #[call [fun cursor_save_dec]] + #[para] equivalent term::ansi::code::ctrl::sca + #[para] DECSC + return \x1b7 + } + proc cursor_restore_dec {} { + #*** !doctools + #[call [fun cursor_restore_attributes]] + #[para] equivalent term::ansi::code::ctrl::rca + #[para] DECRC + return \x1b8 + } + # -- --- --- --- --- + + #DECAWM - automatic line wrapping + proc enable_line_wrap {} { + #*** !doctools + #[call [fun enable_line_wrap]] + #[para] enable automatic line wrapping when characters entered beyond rightmost column + #[para] This will also allow forward movements to move to subsequent lines + #[para] This is DECAWM - and is the same sequence output by 'tput smam' + return \x1b\[?7h + } + proc disable_line_wrap {} { + #*** !doctools + #[call [fun disable_line_wrap]] + #[para] disable automatic line wrapping + #[para] reset DECAWM - same sequence output by 'tput rmam' + #tput rmam + return \x1b\[?7l + } + proc query_mode_line_wrap {} { + #*** !doctools + #[call [fun query_mode_line_wrap]] + #[para] DECRQM to query line-wrap state + #[para] The punk::ansi::query_mode_ functions just emit the ansi query sequence. + return \x1b\[?7\$p + } + #DECRPM responses e.g: + # \x1b\[?7\;1\$y + # \x1b\[?7\;2\$y + #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) + + + #Alt screen buffer + proc enable_alt_screen {} { + #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? + #\x1b\[?1049h ;#xterm + return \x1b\[?47h + } + proc disable_alt_screen {} { + #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] + #\x1b\[?1049l + return \x1b\[?47l + } + + # -- --- --- + + proc erase_line {} { + #*** !doctools + #[call [fun erase_line]] + return \033\[2K + } + proc erase_sol {} { + #*** !doctools + #[call [fun erase_sol]] + #[para]Erase to start of line, leaving cursor position alone. + return \033\[1K + } + proc erase_eol {} { + #*** !doctools + #[call [fun erase_eol]] + return \033\[K + } + #see also clear_above clear_below + # -- --- --- --- --- + + proc scroll_up {n} { + #*** !doctools + #[call [fun scroll_up] [arg n]] + return \x1b\[${n}S + } + proc scroll_down {n} { + #*** !doctools + #[call [fun scroll_down] [arg n]] + return \x1b\[${n}T + } + + proc insert_spaces {count} { + #*** !doctools + #[call [fun insert_spaces] [arg count]] + return \x1b\[${count}@ + } + proc delete_characters {count} { + #*** !doctools + #[call [fun delete_characters] [arg count]] + return \x1b\[${count}P + } + proc erase_characters {count} { + #*** !doctools + #[call [fun erase_characters] [arg count]] + return \x1b\[${count}X + } + proc insert_lines {count} { + #*** !doctools + #[call [fun insert_lines] [arg count]] + return \x1b\[${count}L + } + proc delete_lines {count} { + #*** !doctools + #[call [fun delete_lines] [arg count]] + return \x1b\[${count}M + } + + proc cursor_pos {} { + #*** !doctools + #[call [fun cursor_pos]] + #[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin + #[para]The output on screen will look something like ^[lb][lb]47;3R + #[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. + #[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. + #[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list + return \033\[6n + } + + proc cursor_pos_extended {} { + #includes page e.g ^[[47;3;1R + return \033\[?6n + } + + + #DECFRA - Fill rectangular area + #REVIEW - vt100 accepts decimal values 132-126 and 160-255 ("in the current GL or GR in-use table") + #some modern terminals accept and display characters outside this range - but this needs investigation. + #in a modern unicode era - the restricted range doesn't make a lot of sense - but we need to see what terminal emulators actually do. + #e.g what happens with double-width? + #this wrapper accepts a char rather than a decimal value + proc fill_rect {char t l b r} { + set dec [scan $char %c] + return \x1b\[$dec\;$t\;$l\;$b\;$r\$x + } + #DECFRA with decimal char value + proc fill_rect_dec {decimal t l b r} { + return \x1b\[$decimal\;$t\;$l\;$b\;$r\$x + } + + proc checksum_rect {id page t l b r} { + return "\x1b\[${id}\;${page}\;$t\;$l\;$b\;$r*y" + } + + proc request_cursor_information {} { + #*** !doctools + #[call [fun request_cursor_information]] + #[para]DECRQPSR (DEC Request Presentation State Report) for DECCCIR Cursor Information report + #[para]When written to the terminal, this sequence causes the terminal to emit cursor information to stdin + #[para]A stdin readloop will need to be in place to read this information + return \x1b\[1\$w + } + proc request_tabstops {} { + #*** !doctools + #[call [fun request_tabstops]] + #[para]DECRQPSR (DEC Request Presentation State Report) for DECTABSR Tab stop report + #[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin + return \x1b\[2\$w + } + proc set_tabstop {} { + return \x1bH + } + proc clear_tabstop {} { + return \x1b\[g + } + proc clear_all_tabstops {} { + return \x1b\[3g + } + + + #alternative to string terminator is \007 - + proc titleset {windowtitle} { + #*** !doctools + #[call [fun titleset] [arg windowtitles]] + #[para]Returns the code to set the title of the terminal window to windowtitle + #[para]This may not work on terminals which have multiple panes/windows + return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives + } + #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title + #no cross-platform ansi-only mechanism ? + + proc test_decaln {} { + #Screen Alignment Test + #Reset margins, move cursor to the top left, and fill the screen with 'E' + #(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) + return \x1b#8 + } + + #length of text for printing characters only + #- unicode and other non-printing chars and combining sequences should be handled by the ansifreestring_width call at the end. + #certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names + #review - is there an existing library or better method? printing to a terminal and querying cursor position is relatively slow and terminals lie. + #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first + #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. + proc printing_length {line} { + #string last faster than string first for long strings anyway + if {[tcl::string::last \n $line] >= 0} { + error "line_print_length must not contain newline characters" + } + #what if line has \v (vertical tab) ie more than one logical screen line? + + #review - detect ansi moves and warn/error? They would invalidate this algorithm + #for a string with ansi moves - we would need to use the overtype::renderline function (which is a bit heavier) + #arguably - \b and \r are cursor move operations too - so processing them here is not very symmetrical - review + #the purpose of backspace (or line cr) in embedded text is unclear. Should it allow some sort of character combining/overstrike as it has sometimes done historically (nroff/less)? e.g a\b` as an alternative combiner or bolding if same char + #This should presumably only be done if the over_strike (os) capability is enabled in the terminal. Either way - it presumably won't affect printing width? + set line [punk::ansi::ansistrip $line] + #we can't use simple \b processing if we get ansi codes and aren't actually processing them (e.g moves) + + set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after ansistrip - some like BEL are part of ansi + #backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter + #(* more correctly - moves cursor back) + #Note some terminals process backspace before \v - which seems quite wrong + #backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already + #leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line + # - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. + #curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS + + #Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) + #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces + #normalize tabs to an appropriate* width + #*todo - handle terminal/context where tabwidth != the default 8 spaces + if {[tcl::string::last \t $line] >= 0} { + if {[tcl::info::exists punk::console::tabwidth]} { + set tw $::punk::console::tabwidth + } else { + set tw 8 + } + set line [textutil::tabify::untabify2 $line $tw] + } + + #NOTE - this is non-destructive backspace as it occurs in text blocks - and is likely different to the sequence coming from a terminal or editor which generally does a destructive backspace + #e.g + #This means for example that abc\b has a length of 3. Trailing or leading backslashes have no effect + + #set bs [format %c 0x08] + #set line [tcl::string::map [list "\r\b" "\r"] $line] ;#backsp following a \r will have no effect + set line [tcl::string::trim $line \b] ;#take off at start and tail only + + #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. + #(we are not interested in the destructive backspace case present in editors,terminals etc - that is a different context) + set n 0 + + #set chars [split $line ""] ; #review - graphemes vs chars? Terminals differ in how they treat this. + set chars [punk::char::grapheme_split $line] + set cr_posns [lsearch -all $chars \r] + set bs_posns [lsearch -all $chars \b] + foreach p $cr_posns { + lset chars $p + } + foreach p $bs_posns { + lset chars $p + } + + #mintty seems more 'correct'. It will backspace over an entire grapheme (char+combiners) whereas windows terminal/wezterm etc remove a combiner + #build an output + set idx 0 + set outchars [list] + set outsizes [list] + # -- + #tcl8.6/8.7 we can get a fast byte-compiled switch statement only with literals in the source code + #this is difficult/risky to maintain - hence the lsearch and grapheme-replacement above + #we could reasonably do it with backspace - but cr is more difficult + #note that \x08 \b etc won't work to create a compiled switch statement even with unbraced (separate argument) form of switch statement. + #set bs "" + #set cr ? + # -- + foreach c $chars { + switch -- $c { + { + if {$idx > 0} { + incr idx -1 + } + } + { + set idx 0 + } + default { + #set nxt [llength $outchars] + if {$idx < [llength $outchars]} { + #overstrike? - should usually have no impact on width - width taken as last grapheme in that column + #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done + #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. + lset outchars $idx $c + } else { + lappend outchars $c + } + #punk::ansi::internal::printing_length_addchar $idx $c + incr idx + } + } + } + return [punk::char::ansifreestring_width [join $outchars ""]] + } + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #with thanks to Helmut Giese and other Authors of tcllib textutil + #this version is adjusted to handle ANSI SGR strings + #TODO! ANSI aware version + + proc untabifyLine { line num } { + variable Spaces + + set currPos 0 + while { 1 } { + set currPos [tcl::string::first \t $line $currPos] + if { $currPos == -1 } { + # no more tabs + break + } + + # how far is the next tab position ? + set dist [expr {$num - ($currPos % $num)}] + # replace '\t' at $currPos with $dist spaces + set line [tcl::string::replace $line $currPos $currPos $Spaces($dist)] + + # set up for next round (not absolutely necessary but maybe a trifle + # more efficient) + incr currPos $dist + } + return $line + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + } + + #ever so slightly slower on short strings - much faster than split_at_codes version for large/complex ansi blocks + proc ansistrip {text} { + #*** !doctools + #[call [fun ansistrip] [arg text] ] + #[para]Return a string with ansi codes stripped out + #[para]Alternate graphics chars are replaced with modern unicode equivalents (e.g boxdrawing glyphs) + + if {[punk::ansi::ta::detect_g0 $text]} { + set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters + } + set parts [punk::ansi::ta::split_codes $text] + set out "" + foreach {pt code} $parts { + append out $pt + } + return $out + } + #interp alias {} stripansi {} ::punk::ansi::ansistrip + proc ansistripraw {text} { + #*** !doctools + #[call [fun ansistripraw] [arg text] ] + #[para]Return a string with ansi codes stripped out + #[para]Alternate graphics modes will be stripped rather than converted to unicode - exposing the raw ascii characters as they appear without graphics mode. + #[para]ie instead of a horizontal line you may see: qqqqqq + + set parts [punk::ansi::ta::split_codes $text] + set out "" + foreach {pt code} $parts { + append out $pt + } + return $out + } + #interp alias {} stripansiraw {} ::punk::ansi::ansistripraw + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::ansi ---}] +} + +tcl::namespace::eval punk::ansi { + + + # -- --- --- --- --- --- + #XTGETTCAP + # xterm responds with + # DCS 1 + r Pt ST for valid requests, adding to Pt an = , and + # the value of the corresponding string that xterm would send, + # or + # DCS 0 + r ST for invalid requests. + # The strings are encoded in hexadecimal (2 digits per + # character). If more than one name is given, xterm replies + # with each name/value pair in the same response. An invalid + # name (one not found in xterm's tables) ends processing of the + # list of names. + proc xtgetcap {keylist} { + #ESC P = 0x90 = DCS = Device Control String + set hexkeys [list] + foreach k $keylist { + lappend hexkeys [util::str2hex $k] + } + set payload [join $hexkeys ";"] + return "\x1bP+q$payload\x1b\\" + } + proc xtgetcap2 {keylist} { + #ESC P = 0x90 = DCS = Device Control String + set hexkeys [list] + foreach k $keylist { + lappend hexkeys [util::str2hex $k] + } + set payload [join $hexkeys ";"] + return "\u0090+q$payload\u009c" + } + tcl::namespace::eval codetype { + #*** !doctools + #[subsection {Namespace punk::ansi::codetype}] + #[para] API functions for punk::ansi::codetype + #[para] Utility functions for processing ansi code sequences + #[list_begin definitions] + + #Functions that are primarily intended to operate on a single ansi code sequence - rather than a sequence, or codes embedded in another string + #in some cases multiple sequences or leading trailing strings are ok - but the proc docs should note where the function is looking + #review - separate namespace for functions that operate on multiple or embedded? + + proc is_sgr {code} { + #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline + #we will accept and pass through the less common colon separator (ITU Open Document Architecture) + #Terminals should generally ignore it if they don't use it + regexp {\033\[[0-9;:]*m$} $code + } + + #review - has_cursor_move_in_line? Are we wanting to allow strings/sequences and detect that there are no moves that *aren't* within line? + proc is_cursor_move_in_line {code {knownline ""}} { + if {[regexp {\033\[[0-9]*(:?C|D|G)$} $code]} { + return 1 + } + if {[tcl::string::is integer -strict $knownline]} { + #CSI n : m H where row n happens to be current line - review/test + set re [tcl::string::map [list %n% $knownline] {\x1b\[%n%:[0-9]*H$}] + if {[regexp $re $code]} { + return 1 + } + } + return 0 + } + #pure SGR reset with no other functions + proc is_sgr_reset {code} { + #*** !doctools + #[call [fun is_sgr_reset] [arg code]] + #[para]Return a boolean indicating whether this string has a trailing pure SGR reset + #[para]Note that if the reset is not the very last item in the string - it will not be detected. + #[para]This is primarily intended for testing a single ansi code sequence, but code can be any string where the trailing SGR code is to be tested. + + #todo 8-bit csi + regexp {\x1b\[0*m$} $code + } + + + #whether this code has 0 (or equivalently empty) parameter (but may set others) + #if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes + #it generally only makes sense for the reset to be the first parameter - otherwise the code has ineffective portions + #However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. + #We only look at the initial parameter within the trailing SGR code as this is the well-formed normal case. + + #Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg colour twice in same code + proc has_sgr_leadingreset {code} { + #*** !doctools + #[call [fun has_sgr_leadingreset] [arg code]] + #[para]The reset must be the very first item in code to be detected. Trailing strings/codes ignored. + set params "" + #we need non-greedy + if {[regexp {^\033\[([^m]*)m} $code _match params]} { + #must match trailing m to be the type of reset we're looking for + set plist [split $params ";"] + if {[tcl::string::trim [lindex $plist 0] 0] eq ""} { + #e.g \033\[m \033\[0\;...m \033\[0000...m + return 1 + } else { + return 0 + } + } else { + return 0 + } + } + proc is_gx {code} { + #g0 {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} + #g1 {(?:\x1b\)0)(?:(?!\x1b\)B).)*\x1b\)B} + regexp {\x1b(?:\(0(?:(?:(?!\x1b\(B).)*\x1b\(B)|\)0(?:(?:(?!\x1b\)B).)*\x1b\)B))} $code + } + proc is_gx_open {code} { + #todo g2,g3? + #pin to start and end with ^ and $ ? + #regexp {\x1b\(0|\x1b\)0} $code + regexp {\x1b(?:\(0|\)0)} $code + } + proc is_gx_close {code} { + #regexp {\x1b\(B|\x1b\)B} $code + regexp {\x1b(?:\(B|\)B)} $code + } + #input assumed to be single codes - simple test for 2nd char left bracket and trailing m is done anyway - codes not matching are ignored and passed through + #This is not order-preserving if non-sgr codes are present as they are tacked on to the end even if they initially were before all SGR codes + + variable codestate_empty + set codestate_empty [tcl::dict::create] + tcl::dict::set codestate_empty rst "" ;#0 (or empty) + tcl::dict::set codestate_empty intensity "" ;#1 bold, 2 dim, 22 normal + tcl::dict::set codestate_empty italic "" ;#3 on 23 off + tcl::dict::set codestate_empty underline "" ;#4 on 24 off + + #nonstandard/extended 4:0,4:1,4:2,4:3,4:4,4:5 + #4:1 single underline and 4:2 double underline deliberately kept separate to standard SGR versions + #The extended codes are merged separately allowing fallback SGR to be specified for terminals which don't support extended underlines + tcl::dict::set codestate_empty underextended "" ;#4:0 for no extended underline 4:1 etc for underline styles + #tcl::dict::set codestate_empty undersingle "" + #tcl::dict::set codestate_empty underdouble "" + #tcl::dict::set codestate_empty undercurly "" + #tcl::dict::set codestate_empty underdottedn "" + #tcl::dict::set codestate_empty underdashed "" + + tcl::dict::set codestate_empty blink "" ;#5 or 6 for slow/fast, 25 for off + tcl::dict::set codestate_empty reverse "" ;#7 on 27 off + tcl::dict::set codestate_empty hide "" ;#8 on 28 off + tcl::dict::set codestate_empty strike "" ;#9 on 29 off + tcl::dict::set codestate_empty font "" ;#10, 11-19 10 being primary + tcl::dict::set codestate_empty gothic "" ;#20 + tcl::dict::set codestate_empty doubleunderline "" ;#21 (standard SGR double as opposed to underdouble) + tcl::dict::set codestate_empty proportional "" ;#26 - see note below + tcl::dict::set codestate_empty frame_or_circle "" ;#51,52 on - 54 off (54 off) (not generally used - mintty has repurposed for emoji variation selector) + + #ideogram rarely supported - this implementation untested - each attribute kept separate as they presumably can be applied simultaneously + tcl::dict::set codestate_empty ideogram_underline "" + tcl::dict::set codestate_empty ideogram_doubleunderline "" + tcl::dict::set codestate_empty ideogram_overline "" + tcl::dict::set codestate_empty ideogram_doubleoverline "" + tcl::dict::set codestate_empty ideogram_clear "" + + tcl::dict::set codestate_empty overline "" ;#53 on 55 off - probably not supported - pass through. Seem to be ok to merge with other SGR even if not supported. + tcl::dict::set codestate_empty underlinecolour "" ;#58 - same arguments as 256colour and rgb (nonstandard - in Kitty ,VTE,mintty and iTerm2) + + # -- mintty? + tcl::dict::set codestate_empty superscript "" ;#73 + tcl::dict::set codestate_empty subscript "" ;#74 + tcl::dict::set codestate_empty nosupersub "" ;#75 + # -- + + tcl::dict::set codestate_empty fg "" ;#30-37 + 90-97 + tcl::dict::set codestate_empty bg "" ;#40-47 + 100-107 + + + #misnomer should have been sgr_merge_args ? :/ + #as a common case optimisation - it will not merge a single element list, even if that code contains redundant elements + proc sgr_merge_list {args} { + if {[llength $args] == 0} { + return "" + } elseif {[llength $args] == 1} { + return [lindex $args 0] + } + sgr_merge $args + } + proc sgr_merge {codelist args} { + set allparts [list] + foreach c $codelist { + set cparts [punk::ansi::ta::split_codes_single $c] + lappend allparts {*}[lsearch -all -inline -not $cparts ""] + } + sgr_merge_singles $allparts {*}$args + } + + variable defaultopts_sgr_merge_singles + set defaultopts_sgr_merge_singles [tcl::dict::create\ + -filter_fg 0\ + -filter_bg 0\ + -filter_reset 0\ + ] + + #codes *must* already have been split so that one esc per element in codelist + #e.g codelist [a+ Yellow Red underline] [a+ blue] [a+ red] is ok + #but codelist "[a+ Yellow Red underline][a+ blue]" [a+ red] is not + #(use punk::ansi::ta::split_codes_single) + proc sgr_merge_singles {codelist args} { + variable codestate_empty + variable defaultopts_sgr_merge_singles + set opts $defaultopts_sgr_merge_singles + foreach {k v} $args { + switch -- $k { + -filter_fg - -filter_bg - -filter_reset { + tcl::dict::set opts $k $v + } + default { + error "sgr_merge unknown option '$k'. Known options [tcl::dict::keys $opts]" + } + } + } + + set othercodes [list] + set codestate $codestate_empty + set codestate_initial $codestate_empty ;#keep a copy for resets. + set did_reset 0 + + #we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result? + #There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes? + #we will output 7bit merge of the SGRs even if some or all were 8bit CSi + #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals + #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. + #review - consider a higher-level option for always emitting 8bit or always 7bit + #either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream. + + #We still output any non SGR codes in the list as they came in - preserving their CSI + + foreach c $codelist { + #normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes + #.. but preserve original c + #set cnorm [tcl::string::map [list \x9b {8[} ] $c] + #switch -- [tcl::string::index $cnorm 1][string index $cnorm end] {} + # {[m} + + set cnorm [tcl::string::map [list \x9b 8CSI "\x1b\[" 7CSI ] $c] + switch -- [tcl::string::range $cnorm 0 3][tcl::string::index $cnorm end] { + 7CSIm - 8CSIm { + #set params [tcl::string::range $cnorm 2 end-1] ;#strip leading esc lb and trailing m + set params [tcl::string::range $cnorm 4 end-1] ;#string leading XCSI and trailing m + + #some systems use colon for 256 colours or RGB or nonstandard subparameters + #- it is therefore probably not ok to map to semicolon within SGR codes and treat the same. + # - will break mintty? set params [tcl::string::map [list : {;}] $params] + set plist [split $params {;}] + if {![llength $plist]} { + #if there was nothing - it must be a reset - we need it in the list + lappend plist "" + } + #we shouldn't get an empty or zero param beyond index 0 - but it's possible + #some codes have additional parameters - e.g rgb colours so we need to jump forward in the parameter list sometimes. + for {set i 0} {$i < [llength $plist]} {incr i} { + set p [lindex $plist $i] + set paramsplit [split $p :] + #for some cases we passthrough $p instead of just the number - in case another implementation uses the colon subparameters + #e.g see https://github.com/mintty/mintty/wiki/Tips#text-attributes-and-rendering + #this may have originated with kitty? + #windows terminal seems to be implementing it too + #however, they can be completely repurposed - so we probably need to specifically support them.. REVIEW. + + #review - what about \x1b\[000m + #we need to accept/ignore leading zeros - we can't just pass to expr - as some tcl versions still see leading zero as octal + set codeint [tcl::string::trimleft [lindex $paramsplit 0] 0] + switch -- $codeint { + "" - 0 { + if {![tcl::dict::get $opts -filter_reset]} { + set codestate $codestate_initial + set did_reset 1 + } + } + 1 { + #bold + if {[llength $paramsplit] == 1} { + tcl::dict::set codestate intensity $p + } else { + if {[lindex $paramsplit 1] eq "2"} { + tcl::dict::set codestate shadowed "1:2" ;#turn off also with 22 + } + } + } + 2 { + #dim + tcl::dict::set codestate intensity 2 + } + 3 { + tcl::dict::set codestate italic 3 + } + 4 { + #REVIEW - merging extended (e.g 4:4) underline attributes suppresses all other SGR attributes on at least some terminals which don't support extended underlines + if {[llength $paramsplit] == 1} { + tcl::dict::set codestate underline 4 + } else { + switch -- [lindex $paramsplit 1] { + 0 { + #no *extended* underline + #tcl::dict::set codestate underline 24 + tcl::dict::set codestate underextended 4:0 ;#will not turn off SGR standard underline if term doesn't support extended + } + 1 { + tcl::dict::set codestate underextended 4:1 + } + 2 { + tcl::dict::set codestate underextended 4:2 + } + 3 { + tcl::dict::set codestate underextended "4:3" + } + 4 { + tcl::dict::set codestate underextended "4:4" + } + 5 { + tcl::dict::set codestate underextended "4:5" + } + } + + } + } + 5 - 6 { + tcl::dict::set codestate blink $p + } + 7 { + tcl::dict::set codestate reverse 7 + } + 8 { + tcl::dict::set codestate hide 8 + } + 9 { + tcl::dict::set codestate strike 9 + } + 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 { + tcl::dict::set codestate font $p + } + 20 { + tcl::dict::set codestate gothic 20 + } + 21 { + #ECMA-48 double underline - some terminals use as not-bold. For now we won't support that. + tcl::dict::set codestate doubleunderline 21 + } + 22 { + #normal intensity + tcl::dict::set codestate intensity 22 + tcl::dict::set codestate shadowed "" + } + 23 { + #? wikipedia mentions blackletter - review + tcl::dict::set codestate italic 23 + } + 24 { + tcl::dict::set codestate underline 24 ;#off + tcl::dict::set codestate underextended "4:0" ;#review + } + 25 { + tcl::dict::set codestate blink 25 ;#off + } + 26 { + #not known to be used in terminals.. could it be used with elastic tabstops? - review + tcl::dict::set codestate proportional 26 + } + 27 { + tcl::dict::set codestate reverse 27 ;#off + } + 28 { + tcl::dict::set codestate hide 28 ;#reveal + } + 29 { + tcl::dict::set codestate strike 29;#off + } + 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 { + tcl::dict::set codestate fg $p ;#foreground colour + } + 38 { + #256 colour or rgb + #check if subparams supplied as colon separated + if {[tcl::string::first : $p] < 0} { + switch -- [lindex $plist $i+1] { + 5 { + #256 - 1 more param + tcl::dict::set codestate fg "38\;5\;[lindex $plist $i+2]" + incr i 2 + } + 2 { + #rgb + tcl::dict::set codestate fg "38\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" + incr i 4 + } + } + } else { + #apparently subparameters can be left empty - and there are other subparams like transparency and colour-space + #we should only need to pass it all through for the terminal to understand + #review + tcl::dict::set codestate fg $p + } + } + 39 { + tcl::dict::set codestate fg 39 ;#default foreground + } + 40 - 41 - 42 - 43 - 44 - 45 - 46 - 47 { + tcl::dict::set codestate bg $p ;#background colour + } + 48 { + #256 colour or rgb + if {[tcl::string::first : $p] < 0} { + switch -- [lindex $plist $i+1] { + 5 { + #256 - 1 more param + tcl::dict::set codestate bg "48\;5\;[lindex $plist $i+2]" + incr i 2 + } + 2 { + #rgb + tcl::dict::set codestate bg "48\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" + incr i 4 + } + } + } else { + tcl::dict::set codestate bg $p + } + } + 49 { + tcl::dict::set codestate bg 49 ;#default background + } + 50 { + tcl::dict::set codestate proportional 50 ;#off - see 26 + } + 51 - 52 { + tcl::dict::set codestate frame_or_circle 51 + } + 53 { + tcl::dict::set codestate overline 53 ;#not supported in terminals? pass through anyway + } + 54 { + tcl::dict::set codestate frame_or_circle 54 ;#off + } + 55 { + tcl::dict::set codestate overline 55; #off + } + 58 { + #nonstandard + #256 colour or rgb + if {[tcl::string::first : $p] < 0} { + switch -- [lindex $plist $i+1] { + 5 { + #256 - 1 more param + tcl::dict::set codestate underlinecolour "58\;5\;[lindex $plist $i+2]" + incr i 2 + } + 2 { + #rgb + tcl::dict::set codestate underlinecolour "58\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" + incr i 4 + } + } + } else { + tcl::dict::set codestate underlinecolour $p + } + } + 59 { + #nonstandard - default underlinecolour + tcl::dict::set codestate underlinecolour 59 + } + 60 { + tcl::dict::set codestate ideogram_underline 60 + tcl::dict::set codestate ideogram_clear "" + } + 61 { + tcl::dict::set codestate ideogram_doubleunderline 61 + tcl::dict::set codestate ideogram_clear "" + } + 62 { + tcl::dict::set codestate ideogram_overline 62 + tcl::dict::set codestate ideogram_clear "" + } + 63 { + tcl::dict::set codestate ideogram_doubleoverline 63 + tcl::dict::set codestate ideogram_clear "" + } + 64 { + tcl::dict::set codestate ideogram_stress 64 + tcl::dict::set codestate ideogram_clear "" + } + 65 { + tcl::dict::set codestate ideogram_clear 65 + #review - we still need to pass through the ideogram_clear in case something understands it + tcl::dict::set codestate ideogram_underline "" + tcl::dict::set codestate ideogram_doubleunderline "" + tcl::dict::set codestate ideogram_overline "" + tcl::dict::set codestate ideogram_doubleoverline "" + } + 73 { + #mintty only? + #can be combined with subscript + tcl::dict::set codestate superscript 73 + tcl::dict::set codestate nosupersub "" + } + 74 { + tcl::dict::set codestate subscript 74 + tcl::dict::set codestate nosupersub "" + } + 75 { + tcl::dict::set codestate nosupersub 75 + tcl::dict::set codestate superscript "" + tcl::dict::set codestate subcript "" + } + 90 - 91 - 92 - 93 - 94 - 95 - 96 - 97 { + tcl::dict::set codestate fg $p + } + 100 - 101 - 102 - 103 - 104 - 105 - 106 - 107 { + tcl::dict::set codestate bg $p + } + + } + } + } + default { + lappend othercodes $c + } + } + + } + + set codemerge "" + set unmergeable "" ;# can merge with each other but not main set (for terminals not supporting extended codes) + if {[tcl::dict::get $opts -filter_fg] || [tcl::dict::get $opts -filter_bg]} { + #safe jumptable test + #dict for {k v} $codestate {} + tcl::dict::for {k v} $codestate { + switch -- $v { + "" { + } + default { + switch -- $k { + bg { + if {![tcl::dict::get $opts -filter_bg]} { + append codemerge "${v}\;" + } + } + fg { + if {![tcl::dict::get $opts -filter_fg]} { + append codemerge "${v}\;" + } + } + underlinecolour - underextended { + append unmergeable "${v}\;" + } + default { + append codemerge "${v}\;" + } + } + } + } + } + } else { + #safe jumptable test + #dict for {k v} $codestate {} + tcl::dict::for {k v} $codestate { + switch -- $v { + "" {} + default { + switch -- $k { + underlinecolour - underextended { + append unmergeable "${v}\;" + } + default { + append codemerge "${v}\;" + } + } + } + } + } + } + if {$did_reset} { + #review - unmergeable + set codemerge "0\;$codemerge" + if {$codemerge eq ""} { + set unmergeable "0\;$unmergeable" + } + } + #puts "+==> codelist:[ansistring VIEW $codelist] did_reset:$did_reset codemerge:[ansistring VIEW $codemerge] unmergeable:[ansistring VIEW $unmergeable]" + if {$codemerge ne ""} { + set codemerge [tcl::string::trimright $codemerge {;}] + if {$unmergeable ne ""} { + set unmergeable [tcl::string::trimright $unmergeable {;}] + return "\x1b\[${codemerge}m\x1b\[${unmergeable}m[join $othercodes ""]" + } else { + return "\x1b\[${codemerge}m[join $othercodes ""]" + } + } else { + if {$unmergeable eq ""} { + #there were no SGR codes - not even resets + return [join $othercodes ""] + } else { + set unmergeable [tcl::string::trimright $unmergeable {;}] + return "\x1b\[${unmergeable}m[join $othercodes ""]" + } + } + } + + #has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}] + } + tcl::namespace::eval sequence_type { + proc is_Fe {code} { + # C1 control codes + if {[regexp {^\033\[[\u0040-\u005F]}]} { + #7bit - typical case + return 1 + } + #8bit + #review - all C1 escapes ? 0x80-0x90F + #This is possibly problematic as it is affected by encoding. + #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit + #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." + return 0 + } + proc is_Fs {code} { + puts stderr "is_Fs unimplemented" + } + } + # -- --- --- --- --- --- --- --- --- --- --- + #todo - implement colour resets like the perl module: + #https://metacpan.org/pod/Text::ANSI::Util + #(saves up all ansi colour codes since previous colour reset and replays the saved codes after our highlighting is done) +} + + +tcl::namespace::eval punk::ansi::ta { + #*** !doctools + #[subsection {Namespace punk::ansi::ta}] + #[para] text ansi functions + #[para] based on but not identical to the Perl Text Ansi module: + #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm + #[list_begin definitions] + tcl::namespace::path ::punk::ansi + + #handle both 7-bit and 8-bit csi + #review - does codepage affect this? e.g ebcdic has 8bit csi in different position + + #CSI + #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m + variable re_csi_open {(?:\x1b\[|\u009b)} + #variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@\^_\{|\}\[\]~`]} + variable re_csi_code {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]} + + #intermediate bytes range 0x20-0x2F (ascii space and !"#$%&'()*+,-./) + #parameter bytes range 0x30-0x3F (ascii 0-9:;<=>?) + #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). + + #colour and style + variable re_sgr {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m \033\[m \033\[0m \033\[m0000m + + #OSC - termnate with BEL (\a \007) or ST (string terminator \x1b\\) + # 8-byte string terminator is \x9c (\u009c) + + #non-greedy by excluding ST terminators + variable re_esc_osc1 {(?:\x1b\])(?:[^\007]*)\007} + #variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} ;#somewhat wrong - we want to exclude the ST - not other esc sequences + variable re_esc_osc2 {(?:\x1b\])(?:(?!\x1b\\).)*\x1b\\} + variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} + variable re_osc_open {(?:\x1b\]|\u009d).*} + + + variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] + variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} + + #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess + variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} + variable re_g0_open {(?:\x1b\(0)} + variable re_g0_close {(?:\x1b\(B)} + + # DCS "ESC P" or "0x90" is also terminated by ST + set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + #ST terminators [list \007 \033\\ \u009c] + + #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) + #non-greedy by exclusion of ST terminators in body + #we need to match \x1b\\ not just \x1b There could be colour codes nested in a privacy msg/string + #even if terminals generally don't support that - it's quite possible for an ansi code to get nested this way - and we'd prefer it not to break our splits + #Just checking for \x1b will terminate the match too early + #we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) + #variable re_ST {(?:\x1bX|\u0098|\x1b\^|\u009E|\x1b_|\u009F)(?:[^\x1b\007\u009c]*)(?:\x1b\\|\007|\u009c)} ;#downsides: early terminating with nests, mixes 7bit 8bit start/ends (does that exist in the wild?) + #keep our 8bit/7bit start-end codes separate + variable re_ST {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)} + + + + #consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open + + #default for regexes is non-newline-sensitive matching - ie matches can span lines + # -- --- --- --- + variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}" + # -- --- --- --- + #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regext TRIE generator that works with Tcl regexes + #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone. + variable re_ansi_detect {(?:\x1b(?:\((?:0|B)|\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|007).)*(?:\x1b\\|\007))|c|7|8|M|E|D|H|=|>|(?:#(?:3|4|5|6|8))))|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)|(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]|(?:\u009d)(?:[^\u009c]*)?\u009c} + # -- --- --- --- + + + + variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_ST_open}|${re_g0_open}" + + #may be same as detect - kept in case detect needs to diverge + #variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}" + set re_ansi_split $re_ansi_detect + + #detect any ansi escapes + #review - only detect 'complete' codes - or just use the opening escapes for performance? + proc detect {text} [string map [list [list $re_ansi_detect]] { + #*** !doctools + #[call [fun detect] [arg text]] + #[para]Return a boolean indicating whether Ansi codes were detected in text + #[para]Important caveat: + #[para] When text is a tcl list made from splitting (or lappending) some ansi string - individual elements may be braced or have certain chars escaped. (one example is if a list element contains an unbalanced brace) + #[para] This can cause square brackets that form part of the ansi being backslash escaped - and the regexp can fail to match + regexp $text + }] + + #can be used on dicts - but will check keys too. keys could also contain ansi and have escapes + proc detect_in_list {list} { + detect [join $list " "] + } + proc detect_in_list2 {list} { + foreach item $list { + if {[detect $item]} { + return 1 + } + } + return 0 + } + proc detect_g0 {text} [string map [list [list $re_g0_group]] { + regexp $text + }] + #note - micro optimisation of inlining gives us *almost* nothing extra. + #left in place for a few such as detect/detect_g0 as we want them as fast as possible + # in general the technique doesn't seem particularly worthwhile for this set of functions. + #the performance is dominated by the complexity of the regexp + proc detect2 {text} { + variable re_ansi_detect + expr {[regexp $re_ansi_detect $text]} + } + + proc detect_open {text} { + variable re_ansi_detect_open + expr {[regexp $re_ansi_detect_open $text]} + } + + #not in perl ta + proc detect_csi {text} { + #*** !doctools + #[call [fun detect_csi] [arg text]] + #[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text + #[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] + #[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation + #[para]There is also a multi-byte escape sequence \u009b + #[para]This is less commonly used but is also detected here + #[para](This function is not in perl ta) + variable re_csi_open + expr {[regexp $re_csi_open $text]} + } + proc detect_sgr {text} { + #*** !doctools + #[call [fun detect_sgr] [arg text]] + #[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. + #[para]This is the set of CSI sequences ending in 'm' + #[para]This is most commonly an Ansi colour code - but also things such as underline and italics + #[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. + #[para](This function is not in perl ta) + variable re_sgr + expr {[regexp $re_sgr $text]} + } + proc strip {text} { + #*** !doctools + #[call [fun strip] [arg text]] + #[para]Return text stripped of Ansi codes + #[para]This is a tailcall to punk::ansi::ansistrip + tailcall ansistrip $text + } + proc length {text} { + #*** !doctools + #[call [fun length] [arg text]] + #[para]Return the character length after stripping ansi codes - not the printing length + + #we can use ansistripraw to avoid g0 conversion - as the length should remain the same + tcl::string::length [ansistripraw $text] + } + #todo - handle newlines + #not in perl ta + #proc printing_length {text} { + # + #} + + proc trunc {text width args} { + + } + + #not in perl ta + #returns just the plaintext portions in a list + proc split_at_codes {str} [string map [list $re_ansi_split] { + #variable re_ansi_split + #punk::ansi::internal::splitx $str ${re_ansi_split} + punk::ansi::ta::Do_split_at_codes $str {} + }] + #it is faster to split this function out than to inline it into split_at_codes in tcl 8.7 - something to do with the use of the variable vs argument for the regexp + #literal inlining of the re in the main proc-body was slower too - but inlining it into the wrapper seems to work (a tiny bit) + #the difference is not often apparent when comparing timerate results from split_at_codes vs split_at_codes2 - + # - but in aggregate for something like textblock::periodic - we can get a bit over 5% faster (e.g 136ms vs 149ms) + proc Do_split_at_codes {str regexp} { + if {$str eq ""} { + return {} + } + #no infinite loop check on regexp like splitx does because we should have tested re_ansi_split during development + set list {} + set start 0 + while {[regexp -start $start -indices -- $regexp $str match submatch]} { + lassign $submatch subStart subEnd + lassign $match matchStart matchEnd + incr matchStart -1 + incr matchEnd + lappend list [tcl::string::range $str $start $matchStart] + if {$subStart >= $start} { + lappend list [tcl::string::range $str $subStart $subEnd] + } + set start $matchEnd + } + lappend list [tcl::string::range $str $start end] + return $list + + } + proc Do_split_at_codes_join {str regexp} { + if {$str eq ""} { + return {} + } + #no infinite loop check on regexp like splitx does because we should have tested re_ansi_split during development + set list {} + set start 0 + while {[regexp -start $start -indices -- $regexp $str match submatch]} { + lassign $submatch subStart subEnd + lassign $match matchStart matchEnd + incr matchStart -1 + incr matchEnd + lappend list [tcl::string::range $str $start $matchStart] + if {$subStart >= $start} { + lappend list [tcl::string::range $str $subStart $subEnd] + } + set start $matchEnd + } + lappend list [tcl::string::range $str $start end] + return [join $list ""] + } + proc split_at_codes2 {str} [string map [list $re_ansi_split] { + #variable re_ansi_split + #punk::ansi::internal::splitx $str ${re_ansi_split} + + #set regexp $re_ansi_split + #set regexp {} + + #inline splitx to avoid regex checks + #from textutil::split::splitx + # Bugfix 476988 + if {$str eq ""} { + return {} + } + #if {[regexp $regexp {}]} { + # return -code error \ + # "splitting on regexp \"$re_ansi_split\" would cause infinite loop" + #} + #no infinite loop check on regexp like splitx does because we should have tested re_ansi_split during development + set list {} + set start 0 + while {[regexp -start $start -indices -- {} $str match submatch]} { + lassign $submatch subStart subEnd + lassign $match matchStart matchEnd + incr matchStart -1 + incr matchEnd + lappend list [tcl::string::range $str $start $matchStart] + if {$subStart >= $start} { + lappend list [tcl::string::range $str $subStart $subEnd] + } + set start $matchEnd + } + lappend list [tcl::string::range $str $start end] + return $list + }] + + # -- --- --- --- --- --- + #Split $text to a list containing alternating ANSI colour codes and text. + #ANSI colour codes are always on the second element, fourth, and so on. + #(ie plaintext on odd list-indices ansi on even indices) + # Example: + #ta_split_codes "" # => "" + #ta_split_codes "a" # => "a" + #ta_split_codes "a\e[31m" # => {"a" "\e[31m"} + #ta_split_codes "\e[31ma" # => {"" "\e[31m" "a"} + #ta_split_codes "\e[31ma\e[0m" # => {"" "\e[31m" "a" "\e[0m"} + #ta_split_codes "\e[31ma\e[0mb" # => {"" "\e[31m" "a" "\e[0m", "b"} + #ta_split_codes "\e[31m\e[0mb" # => {"" "\e[31m\e[0m" "b"} + # + proc split_codes {text} { + variable re_ansi_split + set re "(?:${re_ansi_split})+" + return [_perlish_split $re $text] + } + #micro optimisations on split_codes to avoid function calls and make re var local tend to yield very little benefit (sub uS diff on calls that commonly take 10s/100s of uSeconds) + + #like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) + #- the slightly simpler regex than split_codes means that it will be slightly faster than keeping the codes grouped. + proc split_codes_single {text} { + variable re_ansi_split + return [_perlish_split $re_ansi_split $text] + } + + #review - tcl greedy expressions may match multiple in one element + proc _perlish_split {re text} { + if {$text eq ""} { + return {} + } + set list [list] + set start 0 + + #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW + while {[regexp -start $start -indices -- $re $text match]} { + lassign $match matchStart matchEnd + #puts "->start $start ->match $matchStart $matchEnd" + if {$matchEnd < $matchStart} { + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::index $text $matchStart] + incr start + if {$start >= [tcl::string::length $text]} { + break + } + continue + } + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd] + set start [expr {$matchEnd+1}] + #? + if {$start >= [tcl::string::length $text]} { + break + } + } + return [lappend list [tcl::string::range $text $start end]] + } + + #experiment for coroutine generator + proc _perlish_split_yield {re text} { + if {[tcl::string::length $text] == 0} { + yield {} + } + set list [list] + set start 0 + + #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW + while {[regexp -start $start -indices -- $re $text match]} { + lassign $match matchStart matchEnd + #puts "->start $start ->match $matchStart $matchEnd" + if {$matchEnd < $matchStart} { + yield [tcl::string::range $text $start $matchStart-1] + yield [tcl::string::index $text $matchStart] + incr start + if {$start >= [tcl::string::length $text]} { + break + } + continue + } + yield [tcl::string::range $text $start $matchStart-1] + yield [tcl::string::range $text $matchStart $matchEnd] + set start [expr {$matchEnd+1}] + #? + if {$start >= [tcl::string::length $text]} { + break + } + } + #return [lappend list [tcl::string::range $text $start end]] + yield [tcl::string::range $text $start end] + } + proc _perlish_split2 {re text} { + if {[tcl::string::length $text] == 0} { + return {} + } + set list [list] + set start 0 + #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW + while {[regexp -start $start -indices -- $re $text match]} { + lassign $match matchStart matchEnd + #puts "->start $start ->match $matchStart $matchEnd" + if {$matchEnd < $matchStart} { + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::index $text $matchStart] + incr start + } else { + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd] + set start [expr {$matchEnd+1}] + } + if {$start >= [tcl::string::length $text]} { + break + } + } + return [lappend list [tcl::string::range $text $start end]] + } + proc _ws_split {text} { + regexp -all -inline {(?:\S+)|(?:\s+)} $text + } + # -- --- --- --- --- --- + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] +} +# -- --- --- --- --- --- --- --- --- --- --- +tcl::namespace::eval punk::ansi::class { + #assertions specifically for punk::ansi::class namespace + if {![llength [tcl::info::commands ::punk::ansi::class::assert]]} { + tcl::namespace::import ::punk::assertion::assert + punk::assertion::active 1 + } + + tcl::namespace::eval renderer { + if {[llength [tcl::info::commands ::punk::ansi::class::renderer::base_renderer]]} { + #Can happen if package forget was used and we're reloading (a possibly different version) ? review + ::punk::ansi::class::renderer::base_renderer destroy ;#will automatically destroy other classes such as class_cp437 that use this as a superclass + } + oo::class create base_renderer { + variable o_width + variable o_wrap o_overflow o_appendlines o_looplimit + + variable o_cursor_column o_cursor_row + #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered + variable o_rendereditems + + variable o_from_ansistring o_to_ansistring + variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed + constructor {args} { + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports) + set nspath [tcl::namespace::path] + if {"::punk::ansi::class" ni $nspath} { + lappend nspath ::punk::ansi::class + } + tcl::namespace::path $nspath + #-- -- + if {[llength $args] < 2} { + error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} + } + lassign [lrange $args end-1 end] from_ansistring to_ansistring + set opts [tcl::dict::create\ + -width \uFFEF\ + -wrap 1\ + -overflow 0\ + -appendlines 1\ + -looplimit 15000\ + -experimental {}\ + -cursor_column 1\ + -cursor_row 1\ + ] + puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" + set argsflags [lrange $args 0 end-2] + foreach {k v} $argsflags { + switch -- $k { + -width - -wrap - -overflow - -appendlines - -looplimit - -experimental { + tcl::dict::set opts $k $v + } + default { + #don't use [self class] - or we'll get the superclass + error "[info object class [self]] unknown option '$k'. Known options: [tcl::dict::keys $opts]" + } + } + } + set o_width [tcl::dict::get $opts -width] + set o_wrap [tcl::dict::get $opts -wrap] + set o_overflow [tcl::dict::get $opts -overflow] + set o_appendlines [tcl::dict::get $opts -appendlines] + set o_looplimit [tcl::dict::get $opts -looplimit] + set o_cursor_column [tcl::dict::get $opts -cursor_column] + set o_cursor_row [tcl::dict::get $opts -cursor_row] + + set o_from_ansistring $from_ansistring + set o_ns_from [info object namespace $o_from_ansistring] + set o_to_ansistring $to_ansistring + set o_ns_to [info object namespace $o_to_ansistring] + #set o_render_index -1 ;#zero based. -1 indicates nothing yet rendered. + set o_rendereditems [list] ;#graphemes + controls + individual ansi codes from input $o_from_ansistring + } + #temporary test method + method eval_in {script} { + eval $script + } + method cursor_column {{col ""}} { + if {$col eq ""} { + return $o_cursor_column + } + if {$col < 1} { + error "Minimum cursor_column is 1" + } + set o_cursor_column $col + } + method cursor_row {{row ""}} { + if {$row eq ""} { + return $o_cursor_row + } + if {$row < 1} { + error "Minimum cursor_row is 1" + } + set o_cursor_row $row + } + + #consider scroll area + #we need to render to something with a concept of viewport, offscreen above,below,left,right? + method rendernext {} { + upvar ${o_ns_from}::o_ansisplits from_ansisplits + upvar ${o_ns_from}::o_elements from_elements + upvar ${o_ns_from}::o_splitindex from_splitindex + + #if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' + if {![llength $from_ansisplits]} { + tcl::namespace::eval $o_ns_from {my MakeSplit} + } + + set eidx [llength $o_rendereditems] + + #compare what we've rendered so far to our source to confirm they're still in sync + if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} { + puts stdout "rendereditems 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $o_rendereditems 0 $eidx-1]]" + puts stdout "from_elements 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $from_elements 0 $eidx-1]]" + error "rendernext error - rendering state is out of sync. rendereditems list not-equal to corresponding part of ansistring $o_from_ansistring" + } + if {$eidx == [llength $from_elements]} { + #nothing new available + return [tcl::dict::create type "" rendercount 0 start_count_unrendered 0 end_count_unrendered 0] + } + + set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] + #we need to render in pt code chunks - not each grapheme element individually + #translate from element index to ansisplits index + set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to + + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo type_rendered item + #we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc) + #review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ? + #if so - we should report a list of the grapheme types that were rendered in a pt block + #as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway + #e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation. + + #we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element(?) + + set newtext "" + set rendercount 0 + if {$type_rendered eq "g"} { + + set e_splitindex $process_splitindex + while {$e_splitindex == $process_splitindex && $eidx < [llength $from_elements]} { + append newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + + incr eidx + set e_splitindex [lindex $from_splitindex $eidx] + set elementinfo [lindex $from_elements $eidx] + lassign $elementinfo _type item + } + } else { + #while not g ? render however many ansi sequences are in a row? + set newtext $item + lappend o_rendereditems $elementinfo + incr rendercount + } + + set end_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] + set count_rendered [expr {$start_elements_unrendered - $end_elements_unrendered}] + assert {$rendercount == $count_rendered} + + #todo - renderline equivalent that operates on already split data + + #we start with one inputchunk, but we get appends/inserts if the whole chunk isn't for a single line of output + set inputchunks [list $newtext] + if 0 { + while {[llength $inputchunks]} { + set overtext [lpop inputchunks 0] + if {![tcl::string::length $overtext]} { + continue + } + #set rinfo [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -width $o_width -overflow 0 -cursor_column $col -cursor_row $row $undertext $overtext] + } + } + + $o_to_ansistring append $newtext + + return [tcl::dict::create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] + } + + } + #name all with prefix class_ for rendertype detection + oo::class create class_cp437 { + superclass base_renderer + } + oo::class create class_editbuf { + superclass base_renderer + } + } + + if {[llength [tcl::info::commands ::punk::ansi::class::class_ansistring]]} { + ::punk::ansi::class::class_ansistring destroy + } + #As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string. + #oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics + oo::class create class_ansistring { + variable o_cksum_command o_string o_count + + #this is the main state we keep of the split apart string + #we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext + variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes + variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split + + + #State regarding output renderstring (if any) + variable o_renderout ;#another class_ansistring instance + variable o_renderer ;# punk::ansi::class::renderer::class_ instance + variable o_renderwidth + variable o_rendertype + + # -- per element lookups -- + # llengths should all be the same + # we maintain 4 lookups per entry rather than a single nested list + # it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing. + variable o_elements ;#elements contains entry for each grapheme/control + each ansi code + variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes. + variable o_gx0states ;#0|1 for alternate graphics gx0 + variable o_splitindex ;#entry for each element indicating the index of the split it belongs to. + # -- -- + + constructor {string} { + set o_string $string + + #-- make assert available -- + # By pointing it to the assert imported into ::punk::ansi::class + # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports) + set nspath [tcl::namespace::path] + if {"::punk::ansi::class" ni $nspath} { + lappend nspath ::punk::ansi::class + } + tcl::namespace::path $nspath + #-- -- + + #we choose not to generate an internal split-state for the initial string - which may potentially be large. + #there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it. + #The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary + + set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string) + + set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc. + set o_ptlist [list] + #o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built. + + set o_elements [list] + set o_sgrstacks [list] + set o_gx0states [list] + set o_splitindex [list] + + set o_cksum_command [list sha1::sha1 -hex] + + + #empty if no render methods used + # -- + set o_renderer "" + set o_renderout "" ;#class_ansistring + # -- + + set o_renderwidth 80 + set o_rendertype cp437 + } + + #temporary test method + method eval_in {script} { + eval $script + } + method checksum {} { + if {[catch { + package require sha1 + } errM]} { + error "sha1 package unavailable" + } + return [{*}$o_cksum_command [encoding convertto utf-8 $o_string]] + } + #todo - allow setting checksum algorithm and/or command + + method show_state {{verbose 0}} { + #show some state info - without updating anything + #only use 'my' methods that don't update the state e.g has_ansi + set result "" + if {![llength $o_ansisplits]} { + append result "No internal splits. " + append result \n "has ansi : [my has_ansi]" + append result \n "Tcl string length raw string: [tcl::string::length $o_string]" + } else { + append result \n "has ansi : [my has_ansi]" + append result \n "ansisplit list len: [llength $o_ansisplits]" + append result \n "plaintext list len: [llength $o_ptlist]" + append result \n "cached count : $o_count" + append result \n "Tcl string length raw string : [tcl::string::length $o_string]" + append result \n "Tcl string length plaintext parts: [tcl::string::length [join $o_ptlist ""]]" + if {[llength $o_ansisplits] %2 == 0} { + append result \n -------------------------------------------------- + append result \n Warning - ansisplits appears to be invalid length + append result \n Use show_state 1 to view + append result \n -------------------------------------------------- + } + } + if {$o_renderer ne ""} { + append result \n " renderer obj: $o_renderer" + append result \n " renderer class: [info object class $o_renderer]" + } + if {$o_renderout ne ""} { + append result \n " render target ansistring: $o_renderout" + append result \n " render target has ansi : [$o_renderout has_ansi]" + append result \n " render target count : [$o_renderout count]" + } + if {$verbose} { + append result \n "ansisplits listing" + #we will use a foreach with a single var rather than foreach {pt code} - so that if something goes wrong it's clearer. + #(using foreach {pt code} on an odd element list will give a spurious empty code at the end) + set i 0 + foreach item $o_ansisplits { + if {$i % 2 == 0} { + set type "pt " + } else { + set type code + } + append result \n "$type: [ansistring VIEW $item]" + incr i + } + append result \n "Last element of ansisplits should be of type pt" + } + return $result + } + + #private method + method MakeSplit {} { + #The split with each code as it's own element is more generally useful. + set o_ansisplits [punk::ansi::ta::split_codes_single $o_string]; + set o_ptlist [list] + set codestack [list] + set gx0_state 0 ;#default off + set current_split_index 0 ;#incremented for each pt block, incremented for each code + if {$o_count eq ""} { + set o_count 0 + } + foreach {pt code} $o_ansisplits { + lappend o_ptlist $pt + foreach grapheme [punk::char::grapheme_split $pt] { + lappend o_elements [list g $grapheme] + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + incr o_count + } + #after handling the pt block - incr the current_split_index + incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry + #we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end) + if {$code ne ""} { + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + + #maintenance warning - dup in append! + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list "\x1b\[m"] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + lappend o_elements [list sgr $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set gx0_state 1 + lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set gx0_state 0 + lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend o_elements [list other $code] + } + } + #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index + incr current_split_index + } + } + #assertion every grapheme and every individual code has been added to o_elements + #every element has an entry in o_sgrstacks + #every element has an entry in o_gx0states + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} + } + method convert_altg {} { + #do we need a method to retrieve without converting in the object? + puts "unimplemented" + } + method strippedlength {} { + if {![llength $o_ansisplits]} {my MakeSplit} + + } + #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already + method stripped {} { + if {![llength $o_ansisplits]} {my MakeSplit} + return [join $o_ptlist ""] + } + + #does not affect object state + method DoCount {plaintext} { + #- combiners/diacritics just map them away here - but for consistency we need to combine unicode grapheme clusters too. + #todo - joiners 200d? zwnbsp + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + + #we want length to return number of glyphs + normal controls such as newline.. not screen width. Has to be consistent with index function + return [tcl::string::length [regsub -all $re_diacritics $plaintext ""]] + } + + #This is the count of visible graphemes + non-ansi control chars. Not equal to column width or to the Tcl string length of the ansistripped string!!! + method count {} { + if {$o_count eq ""} { + #only initial string present + if {$o_string eq ""} { + set o_count 0 + return 0 + } + my MakeSplit + #set o_count [my DoCount [join $o_ptlist ""]] + } + return $o_count + } + #this is the equivalent of Tcl string length on the ansistripped string + method length {} { + if {![llength $o_ansisplits]} { + if {[punk::ansi::ta::detect $o_string]} { + my MakeSplit + } else { + return [tcl::string::length $o_string] + } + } elseif {[llength $o_ansisplits] == 1} { + #single split always means no ansi + return [tcl::string::length $o_string] + } + return [tcl::string::length [join $o_ptlist ""]] + } + method length_raw {} { + return [tcl::string::length $o_string] + } + + #channels for stream in/out.. these are vaguely analogous to the input/output between a shell and a PTY Slave - but this is not intended to be a full pseudoterminal + #renderstream_to_render (private?) + # write end held by outer ansistring? read end by inner render ansistring ? + #renderstream_from_render (public?) + + method rendertypes {} { + set classes [tcl::info::commands ::punk::ansi::class::renderer::class_*] + #strip off class_ + set ctypes [lmap v $classes {tcl::string::range [tcl::namespace::tail $v] 6 end}] + } + method rendertype {{rtype ""}} { + if {$rtype eq ""} { + return $o_rendertype + } + set rtypes [my rendertypes] + if {$rtype ni $rtypes} { + error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" + } + if {$o_renderout eq ""} { + #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? + set o_renderout [punk::ansi::class::class_ansistring new ""] + } + if {$o_renderer ne ""} { + set oinfo [info object class $o_renderer] + set tail [tcl::namespace::tail $oinfo] + set currenttype [tcl::string::range $tail 6 end] + if {$rtype ne $currenttype} { + puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" + $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + } else { + return $currenttype + } + } else { + puts "creating first renderer" + set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] + } + } + #--- progressive rendering buffer - another ansistring object + method renderwidth {{rw ""}} { + #report or set the renderwidth - may invalidate existing render progress? restart? + if {$rw eq ""} { + return $o_renderwidth + } + if {$rw eq $o_renderwidth} { + return $o_renderwidth + } + #re-render if needed? + + + set o_renderwidth $rw + } + method render_state {} { + #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary + #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. + #A single number representing the count of graphemes and individual ANSI codes (from the input ansistring) rendered might work + } + method renderbuf {} { + #get the underlying renderobj - if any + return $o_renderout ;#also class_ansistring + } + method render {} { + #full render - return buffer ansistring + } + method rendernext {} { + #render next available pt/code chunk only - not to end of available input + if {$o_renderer eq ""} { + my rendertype $o_rendertype ;#review - proper way to initialise rendering + } + $o_renderer rendernext + } + method render_cursorstate {{row_x_col ""}} { + #report /set? cursor posn + if {$o_renderer eq ""} { + error "No renderer. Call render methods first" + } + return [tcl::dict::create row [$o_renderer cursor_row] column [$o_renderer cursor_column]] + } + #--- + + method get {} { + return $o_string + } + method has_ansi {} { + if {![llength $o_ansisplits]} { + #initial string - for large strings,it's faster to run detect than update the internal split-state. + return [punk::ansi::ta::detect $o_string] + } else { + #string will continue to have a single o_ansisplits element if only non-ansi appended + return [expr {[llength $o_ansisplits] != 1}] + } + } + #todo - has_ansi_movement ? + #If an arbirary ANSI string has movement/cursor restore - then the number of apparent rows in the input will potentially bear no relation to the number of lines of output. + #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows + #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column + + #analagous to Tcl string append + #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient + method append {args} { + set catstr [join $args ""] + if {$catstr eq ""} { + return $o_string + } + + if {![punk::ansi::ta::detect $catstr]} { + #ansi-free additions + #if no initial internal-split - generate it without first appending our additions - as we can more efficiently append them to the internal state + if {![llength $o_ansisplits]} { + #initialise o_count because we need to add to it. + #The count method will do this by calling Makesplit only if it needs to. (which will create ansisplits for anything except empty string) + my count + } + append o_string $catstr;# only append after updating using my count above + if {![llength $o_ptlist]} { + #If the object was initialised with empty string - we can still have empty lists for o_ptlist and o_ansisplits + #even though we can use lset to add to a list - we can't for empty + lappend o_ptlist $catstr + #assertion - if o_ptlist is empty so is o_ansisplits + lappend o_ansisplits $catstr + } else { + lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] $catstr] + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] $catstr] + } + set last_codestack [lindex $o_sgrstacks end] + set last_gx0state [lindex $o_gx0states end] + set current_split_index [expr {[llength $o_ansisplits]-1}] ;#we are attaching to existing trailing pt - use its splitindex + foreach grapheme [punk::char::grapheme_split $catstr] { + lappend o_elements [list g $grapheme] + lappend o_sgrstacks $last_codestack + lappend o_gx0states $last_gx0state + lappend o_splitindex $current_split_index + incr o_count + } + #incr o_count [my DoCount $catstr] ;#from before we were doing grapheme split.. review + } else { + if {![llength $o_ansisplits]} { + #if we have an initial string - but no internal split-state because this is our first append and no methods have caused its generation - we can run more efficiently by combining it with the first append + append o_string $catstr ;#append before split and count on whole lot + my MakeSplit ;#update o_count + #set combined_plaintext [join $o_ptlist ""] + #set o_count [my DoCount $combined_plaintext] + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} + return $o_string + } else { + #update each element of internal state incrementally without reprocessing what is already there. + append o_string $catstr + set newsplits [punk::ansi::ta::split_codes_single $catstr] + set ptnew "" + set codestack [lindex $o_sgrstacks end] + set gx0_state [lindex $o_gx0states end] + set current_split_index [lindex $o_splitindex end] + #first pt must be merged with last element of o_ptlist + set new_pt_list [list] + foreach {pt code} $newsplits { + lappend new_pt_list $pt + append ptnew $pt + foreach grapheme [punk::char::grapheme_split $pt] { + lappend o_elements [list g $grapheme] + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + incr o_count + } + incr current_split_index ;#increment 1 of 2 within each loop + if {$code ne ""} { + lappend o_sgrstacks $codestack + lappend o_gx0states $gx0_state + lappend o_splitindex $current_split_index + #maintenance - dup in MakeSplit! + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list "\x1b\[m"] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + lappend o_elements [list sgr $code] + } elseif {[punk::ansi::codetype::is_sgr $code]} { + #basic simplification first - remove straight dupes + set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + lappend o_elements [list sgr $code] + } else { + if {[punk::ansi::codetype::is_gx_open $code]} { + set gx0_state 1 + lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set gx0_state 0 + lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets + } else { + lappend o_elements [list other $code] + } + } + incr current_split_index ;#increment 2 of 2 + } + } + lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] [lindex $new_pt_list 0]] + lappend o_ptlist {*}[lrange $new_pt_list 1 end] + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $newsplits 0]] + lappend o_ansisplits {*}[lrange $newsplits 1 end] + + #if {$o_count eq ""} { + # #we have splits - but didn't count graphemes? + # set o_count [my DoCount [join $o_ptlist ""]] ;#o_ptlist already has ptnew parts + #} else { + # incr o_count [my DoCount $ptnew] + #} + + } + } + assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} + return $o_string + } + + #we are currently assuming that the component strings have complete graphemes ie no split clusters - and therefore we don't attempt to check for and combine at the string catenation points. + #This is 'often'? likely to be true - We don't have grapheme cluster support yet anyway. review. + method appendobj {args} { + if {![llength $o_ansisplits]} { + my MakeSplit + } + foreach a $args { + set ns [info object namespace $a] + upvar ${ns}::o_ansisplits new_ansisplits + upvar ${ns}::o_count new_count + if {![llength $new_ansisplits] || $new_count eq ""} { + tcl::namespace::eval $ns {my MakeSplit} + } + upvar ${ns}::o_ptlist new_ptlist + upvar ${ns}::o_string new_string + upvar ${ns}::o_elements new_elements + upvar ${ns}::o_sgrstacks new_sgrstacks + upvar ${ns}::o_gx0states new_gx0states + upvar ${ns}::o_splitindex new_splitindex + + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] + lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] + lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] [lindex $new_ptlist 0]] + lappend o_ptlist {*}[lrange $new_ptlist 1 end] + + append o_string $new_string + lappend o_elements {*}$new_elements + + #prepend the previous sgr stack to all stacks in the new list. + #This allows us to use only list operations to keep the sgr data valid - but we don't yet make it canonical/flat by examining each for resets etc. + #ie just call sgr_merge_list once now. + set laststack [lindex $o_sgrstacks end] + set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] + foreach n $new_sgrstacks { + lappend o_sgrstacks [list $mergedtail {*}$n] + } + + + lappend o_gx0states {*}$new_gx0states + + #first and last of ansisplits splits merge + set lastidx [lindex $o_splitindex end] + set firstnewidx [lindex $new_splitindex 0] + set diffidx [expr {$lastidx - $firstnewidx}] ;#may be negative + foreach v $new_splitindex { + lappend o_splitindex [expr {$v + $diffidx}] + } + + incr o_count $new_count + } + return $o_count + } + + + #method append_and_render - append and render up to end of appended data at same time + + method view {args} { + if {$o_string eq ""} { + return "" + } + #ansistring VIEW relies only on the raw ansi input as it is essentially just a tcl::string::map. + #We don't need to force an ansisplit if we happen to have an unsplit initial string + ansistring VIEW $o_string + } + method viewcodes {args} { + if {$o_string eq ""} { + return "" + } + if {![llength $o_ansisplits]} {my MakeSplit} + + set redb [a+ red bold] ;#osc/apm ? anything with potential security risks or that is unusual + set greenb [a+ green bold] ;#SGR + set cyanb [a+ cyan bold] ;#col,row movement + set blueb [a+ blue bold] ;# + set blueb_r [a+ blue bold reverse] + set whiteb [a+ white bold] ;#SGR reset (or highlight first part if leading reset) + set GX [a+ black White bold] ;#alt graphics + set unk [a+ yellow bold] ;#unknown/unhandled + set RST [a] + + set re_col_move {\x1b\[([0-9]*)(C|D|G)$} + set re_row_move {\x1b\[([0-9]*)(A|B)$} + set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} + set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} + set re_cursor_save {\x1b\[s$} + set re_cursor_restore {\x1b\[u$} + set re_cursor_save_dec {\x1b7$} + set re_cursor_restore_dec {\x1b8$} + + set arrow_left \u2190 + set arrow_right \u2192 + set arrow_up \u2191 + set arrow_down \u2193 + set arrow_lr \u2194 + set arrow_du \u2195 + #2024 - there is no 4-arrow symbol or variations (common cursor and window icon) in unicode - despite requests and argument from the community that this has been in use for decades. + #They are probably too busy with stupid emoji additions to add this or c1 visualization glyphs. + + #don't split into lines first - \n is valid within ST sections + set output "" + #set splits [punk::ansi::ta::split_codes_single $string] + + foreach {pt code} $o_ansisplits { + append output [ansistring VIEW {*}$args $pt] + + #map DEC cursor_save/restore to CSI version + set code [tcl::string::map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $code] + + + set c1 [tcl::string::index $code 0] + set c1c2 [tcl::string::range $code 0 1] + #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + set leadernorm [tcl::string::range [tcl::string::map [list\ + \x1b\[ 7CSI\ + \x9b 8CSI\ + \x1b\] 7OSC\ + \x1b\( 7GFX\ + \x9d 8OSC\ + \x1b 7ESC\ + ] $c1c2] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars + + #we leave the tail of the code unmapped for now + switch -- $leadernorm { + 7CSI - 7OSC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 2 end]] + } + 7ESC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] + } + 8CSI - 8OSC { + set codenorm [tcl::string::cat $leadernorm [tcl::string::range $code 1 end]] + } + default { + #we haven't made a mapping for this + set codenorm $code + } + } + + switch -- $leadernorm { + {7CSI} - {8CSI} { + set param [tcl::string::range $codenorm 4 end-1] + #puts stdout "--> CSI [tcl::string::index $leadernorm 0] bit param:$param" + switch -- [tcl::string::index $codenorm end] { + m { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set displaycode [ansistring VIEW $code] + append output ${whiteb}$displaycode$RST + } else { + set displaycode [ansistring VIEW $code] + if {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + #highlight the esc & leftbracket in white as indication there is a leading reset + set cposn [tcl::string::first ";" $displaycode] + append output ${whiteb}[tcl::string::range $displaycode 0 $cposn]$RST${greenb}[tcl::string::range $displaycode $cposn+1 end]$RST + } else { + append output ${greenb}$displaycode$RST + } + } + } + A - B { + #row move + set displaycode [ansistring VIEW $code] + set displaycode [tcl::string::map [list A "A$arrow_up" B "B$arrow_down"] $displaycode] + append output ${cyanb}$displaycode$RST + + } + C - D - G { + #set num [tcl::string::range $codenorm 4 end-1] + set displaycode [ansistring VIEW $code] + set displaycode [tcl::string::map [list C "C$arrow_right" D "D$arrow_left" G "G$arrow_lr"] $displaycode] + append output ${cyanb}$displaycode$RST + } + H - f { + set params [tcl::string::range $codenorm 4 end-1] + lassign [split $params {;}] row col + #lassign $matchinfo _match row col + set displaycode [ansistring VIEW $code] + if {$col eq ""} { + #row only move + set map [list H "H${arrow_lr}" f "f${arrow_lr}"] + } else { + #row and col move + set map [list H "H${arrow_lr}${arrow_du}" f "${arrow_lr}${arrow_du}"] + } + set displaycode [tcl::string::map $map $displaycode] + append output ${cyanb}$displaycode$RST + } + s { + append output ${blueb}[ansistring VIEW $code]$RST + } + u { + append output ${blueb_r}[ansistring VIEW $code]$RST + } + default { + append output ${unk}[ansistring VIEW -lf 1 $code]$RST + } + } + } + 7GFX { + switch -- [tcl::string::index $codenorm 4] { + "0" { + append output ${GX}GX+$RST + } + "B" { + append output ${GX}GX-$RST + } + } + } + 7ESC { + append output ${unk}[ansistring VIEW -lf 1 $code]$RST + } + default { + #if the code is a PM (or other encapsulation type code e.g terminated by ST) we want to see linefeeds as visual representation character + append output ${unk}[ansistring VIEW -lf 1 $code]$RST + } + } + + } + return $output + } + + method viewstyle {args} { + if {$o_string eq ""} { + return "" + } + if {![llength $o_ansisplits]} {my MakeSplit} + + #set splits [punk::ansi::ta::split_codes_single $string] + set output "" + set codestack [list] + set gx_stack [list] ;#not actually a stack + set cursor_saved "" + foreach {pt code} $o_ansisplits { + if {[llength $args]} { + set pt [ansistring VIEW {*}$args $pt] + } + append output [punk::ansi::codetype::sgr_merge_list {*}$codestack]$pt + if {$code ne ""} { + append output [a][ansistring VIEW -lf 1 $code] + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } elseif {[punk::ansi::codetype::is_sgr $code]} { + #basic simplification first.. straight dups + set dup_posns [lsearch -all -exact $codestack $code] ;#-exact because of square-bracket glob chars + #lremove not present in pre 8.7! + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } elseif {[regexp {\x1b7|\x1b\[s} $code]} { + #cursor_save + set cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$codestack] + } elseif {[regexp {\x1b8|\x1b\[u} $code]} { + #cursor_restore + set codestack [list $cursor_saved] + } else { + #leave SGR stack as is + if {[punk::ansi::codetype::is_gx_open $code]} { + set gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess + } elseif {[punk::ansi::codetype::is_gx_close $code]} { + set gx_stack [list] + } + } + } + } + return $output + + } + } +} +tcl::namespace::eval punk::ansi { + + proc stripansi3 {text} [string map [list $::punk::ansi::ta::re_ansi_split] { + + #using detect costs us a couple of uS - but saves time on plain text + #we should probably leave this for caller - otherwise it ends up being called more than necessary + #if {![::punk::ansi::ta::detect $text]} { + # return $text + #} + + #alternate graphics codes are not the norm + # - so save a few uS in the common case by only calling convert_g0 if we detect + if {[punk::ansi::ta::detect_g0 $text]} { + set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters + } + punk::ansi::ta::Do_split_at_codes_join $text {} + }] + + proc stripansiraw3 {text} [string map [list $::punk::ansi::ta::re_ansi_split] { + + #join [::punk::ansi::ta::split_at_codes $text] "" + punk::ansi::ta::Do_split_at_codes_join $text {} + }] +} + +tcl::namespace::eval punk::ansi::ansistring { + #*** !doctools + #[subsection {Namespace punk::ansi::ansistring}] + #[para]punk::ansi::ansistring ensemble - ansi-aware string operations + #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings + #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. + #[list_begin definitions] + + tcl::namespace::path [list ::punk::ansi ::punk::ansi::ta] + tcl::namespace::ensemble create + tcl::namespace::export length trim trimleft trimright INDEX COUNT VIEW VIEWCODES VIEWSTYLE INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX NEW + #todo - expose _splits_ methods so caller can work efficiently with the splits themselves + #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single + + #\UFFFD - replacement char or \U2426 + + #using ISO 2047 graphical representations of control characters - probably obsolete? + #00 NUL Null ⎕ U+2395 NU + #01 TC1, SOH Start of Heading ⌈ U+2308 SH + #02 TC2, STX Start of Text ⊥ U+22A5 SX + #03 TC3, ETX End of Text ⌋ U+230B EX + #04 TC4, EOT End of Transmission ⌁ U+2301[9] ET + #05 TC5, ENQ Enquiry ⊠[a] U+22A0 EQ + #06 TC6, ACK Acknowledge ✓ U+2713 AK + #07 BEL Bell ⍾ U+237E[9] BL + #08 FE0, BS Backspace ⤺ —[b] BS + #09 FE1, HT Horizontal Tabulation ⪫ U+2AAB HT + #0A FE2, LF Line Feed ≡ U+2261 LF + #0B FE3, VT Vertical Tabulation ⩛ U+2A5B VT + #0C FE4, FF Form Feed ↡ U+21A1 FF + #0D FE5, CR Carriage Return ⪪ U+2AAA CR + #0E SO Shift Out ⊗ U+2297 SO + #0F SI Shift In ⊙ U+2299 SI + #10 TC7, DLE Data Link Escape ⊟ U+229F DL + #11 DC1, XON, CON[10] Device Control 1 ◷ U+25F7 D1 + #12 DC2, RPT,[10] TAPE[c] Device Control 2 ◶ U+25F6 D2 + #13 DC3, XOF, XOFF Device Control 3 ◵ U+25F5 D3 + #14 DC4, COF, KMC,[10] TAPE[c] Device Control 4 ◴ U+25F4 D4 + #15 TC8, NAK Negative Acknowledge ⍻ U+237B[9] NK + #16 TC9, SYN Synchronization ⎍ U+238D SY + #17 TC10, ETB End of Transmission Block ⊣ U+22A3 EB + #18 CAN Cancel ⧖ U+29D6 CN + #19 EM End of Medium ⍿ U+237F[9] EM + #1A SUB Substitute Character ␦ U+2426[12] SB + #1B ESC Escape ⊖ U+2296 EC + #1C IS4, FS File Separator ◰ U+25F0 FS + #1D IS3, GS Group Separator ◱ U+25F1 GS + #1E IS2, RS Record Separator ◲ U+25F2 RS + #1F IS1 US Unit Separator ◳ U+25F3 US + #20 SP Space △ U+25B3 SP + #7F DEL Delete ▨ —[d] DT + + #C0 control code visual representations + # Code Val Name 2X Description + # 2400 00 NUL NU Symbol for Null + # 2401 01 SOH SH Symbol for Start of Heading + # 2402 02 STX SX Symbol for Start of Text + # 2403 03 ETX EX Symbol for End of Text + # 2404 04 EOT ET Symbol for End of Transmission + # 2405 05 ENQ EQ Symbol for Enquiry + # 2406 06 ACK AK Symbol for Acknowledge + # 2407 07 BEL BL Symbol for Bell + # 2409 09 BS BS Symbol for Backspace + # 2409 09 HT HT Symbol for Horizontal Tab (1) + # 240A 0A LF LF Symbol for Line Feed (1) + # 240B 0B VT VT Symbol for Vertical Tab (1) + # 240C 0C FF FF Symbol for Form Feed (2) + # 240D 0D CR CR Symbol for Carriage Return (1) + # 240E 0E SO SO Symbol for Shift Out + # 240F 0F SI SI Symbol for Shift In + # 2410 10 DLE DL Symbol for Data Link Escape + # 2411 11 DC1 D1 Symbol for Device Control 1 (2) + # 2412 12 DC2 D2 Symbol for Device Control 2 (2) + # 2413 13 DC3 D3 Symbol for Device Control 3 (2) + # 2414 14 DC4 D4 Symbol for Device Control 4 (2) + # 2415 15 NAK NK Symbol for Negative Acknowledge + # 2416 16 SYN SY Symbol for Synchronous Idle + # 2417 17 ETB EB Symbol for End of Transmission Block + # 2418 18 CAN CN Symbol for Cancel + # 2419 19 EM EM Symbol for End of Medium + # 241A 1A SUB SU Symbol for Substitute + # 241B 1B ESC EC Symbol for Escape + # 241C 1C FS FS Symbol for Field Separator (3) + # 241D 1D GS GS Symbol for Group Separator (3) + # 241E 1E RS RS Symbol for Record Separator (3) + # 241F 1F US US Symbol for Unit Separator (3) + # 2420 20 SP SP Symbol for Space (4) + # 2421 7F DEL DT Symbol for Delete (4) + + #C1 control code visual representations + #Code Val Name 2X Description + # 80 80 80 (1) + # 81 81 81 (1) + # E022 82 BPH 82 Symbol for Break Permitted Here (2) + # E023 83 NBH 83 Symbol for No Break Here (2) + # E024 84 IND IN Symbol for Index (3) + # E025 85 NEL NL Symbol for Next Line (4) + # E026 86 SSA SS Symbol for Start Selected Area + # E027 87 ESA ES Symbol for End Selected Area + # E028 88 HTS HS Symbol for Character Tabulation Set + # E029 89 HTJ HJ Symbol for Character Tabulation with Justification + # E02A 8A VTS VS Symbol for Line Tabulation Set + # E02B 8B PLD PD Symbol for Partial Line Forward + # E02C 8C PLU PU Symbol for Partial Line Backward + # E02D 8D RI RI Symbol for Reverse Line Feed + # E02E 8E SS2 S2 Symbol for Single Shift 2 + # E02F 8F SS3 S3 Symbol for Single Shift 3 + # E030 90 DCS DC Symbol for Device Control String + # E031 91 PU1 P1 Symbol for Private Use 1 + # E032 92 PU2 P2 Symbol for Private Use 2 + # E033 93 STS SE Symbol for Set Transmit State + # E034 94 CCH CC Symbol for Cancel Character + # E035 95 MW MW Symbol for Message Waiting + # E036 96 SPA SP Symbol for Start Protected (Guarded) Area + # E037 97 EPA EP Symbol for End Protected (Guarded) Area + # E038 98 SOS 98 Symbol for Start of String (2) + # 99 99 (1) + # E03A 9A SCI 9A Symbol for Single Character Introducer (2) + # E03B 9B CSI CS Symbol for Control Sequence Introducer (5) + # E03C 9C ST ST Symbol for String Terminator + # E03D 9D OSC OS Symbol for Operating System Command + # E03E 9E PM PM Symbol for Privacy Message + # E03F 9F APC AP Symbol for Application Program Command + + variable debug_visuals + #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) + + #Goal is not to map every control character? + #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the tcl::string::map directly + #ETX -ctrl-c + #EOT ctrl-d (EOF?) + #SYN ctrl-v + #SUB ctrl-z + #CAN ctrl-x + #FS ctrl-\ (SIGQUIT) + set visuals_interesting [tcl::dict::create\ + NUL [list \x00 \u2400]\ + ETX [list \x03 \u2403]\ + EOT [list \x04 \u2404]\ + BEL [list \x07 \u2407]\ + SYN [list \x16 \u2416]\ + CAN [list \x18 \u2418]\ + SUB [list \x1a \u241a]\ + FS [list \x1c \u241c]\ + SOS [list \x98 \ue038]\ + CSI [list \x9b \ue03b]\ + ST [list \x9c \ue03c]\ + PM [list \x9e \ue03e]\ + APC [list \x9f \ue03f]\ + ] + #it turns out we need pretty much everything for debugging + set visuals_c0 [tcl::dict::create\ + NUL [list \x00 \u2400]\ + SOH [list \x01 \u2401]\ + STX [list \x02 \u2402]\ + ETX [list \x03 \u2403]\ + EOT [list \x04 \u2404]\ + ENQ [list \x05 \u2405]\ + ACK [list \x06 \u2406]\ + BEL [list \x07 \u2407]\ + FF [list \x0c \u240c]\ + SO [list \x0e \u240e]\ + SF [list \x0f \u240f]\ + DLE [list \x10 \u2410]\ + DC1 [list \x11 \u2411]\ + DC2 [list \x12 \u2412]\ + DC3 [list \x13 \u2413]\ + DC4 [list \x14 \u2414]\ + NAK [list \x15 \u2415]\ + SYN [list \x16 \u2416]\ + ETB [list \x17 \u2417]\ + CAN [list \x18 \u2418]\ + EM [list \x19 \u2419]\ + SUB [list \x1a \u241a]\ + FS [list \x1c \u241c]\ + GS [list \x1d \u241d]\ + RS [list \x1e \u241e]\ + US [list \x1f \u241f]\ + DEL [list \x7f \u2421]\ + ] + #alternate symbols for space + # \u2422 Blank Symbol (b with forwardslash overly) + # \u2423 Open Box (square bracket facing up like a tray/box) + + # \u2424 Symbol for Newline (small "NL") + + # \u2425 Symbol for Delete Form Two (some sort of fat forward-slash thing) + + # \u2426 Symbol for Substitute Form Two (backwards question mark) + + #these are in the PUA (private use area) unicode block - seem to be rarely supported + #the unicode consortium has apparently neglected to provide separate visual representation codepoints for not only the c1 controls (some of which ARE still used e.g in sixels) but various other non-printing chars such as BOM + #The debugging/analysis usecase is an important one - surely moreso that some of the emoji stuff coming out of there. + #we'll hack in some stuff as needed - may override some of the visuals_c1 which is usually just empty/substitute glyphs + #Being repurposed - these could potentially be confused with actual characters depending on the debugging context + #To minimize potential confusion - we'll use a longer replacement sequence - which is not ideal from the perspective of terminal column layout debugging + #A single unique glyph would be better - although the bracketing for 8-bit codes is a useful visual indicator + #(review - BOM should use different brackets to c1?) + + #todo - regularly check if unicode has improved in this area - though with requests for c1 visuals dating back to at least 2011 - it's doubtful. + #for 8-bit controls - we will standardize on a fixed width of 4 bracketing with: + #\u2987 and \u2988 from Miscellaneous Mathematical Symbols-B (D or fractional-moon shaped brackets) + #\u2987 - Z Notation Left Image Bracket + #\u2988 - Z Notation Right Image Bracket + #selection of these is also based on them being seemingly reasonably widely available in fonts.. review + #my apologies if you're debugging z-notation strings! + #If only column's-worth of symbol/char needed between the brackets - pad with a space before the closing bracket + + #8-bit brackets + set ob8 \u2987; set cb8 \u2988 ;#z-notation image brackets + + #miscellaneous debug code brackets + set obm \u27e6 ;set cbm \u27e7 ;#square double brackets from Miscellaneous Mathematical Symbols-A + + #this private range so rarely supported in fonts - and visuals are unknown, so we will make up some 2-letter codes for now + #set visuals_c1 [tcl::dict::create\ + # BPH [list \x82 "${ob8}\ue022 $cb8"]\ + # NBH [list \x83 "${ob8}\ue023 $cb8"]\ + # IND [list \x84 "${ob8}\ue024 $cb8"]\ + # NEL [list \x85 "${ob8}\ue025 $cb8"]\ + # SSA [list \x86 "${ob8}\ue026 $cb8"]\ + # ESA [list \x87 "${ob8}\ue027 $cb8"]\ + # HTS [list \x88 "${ob8}\ue028 $cb8"]\ + # HTJ [list \x89 "${ob8}\ue029 $cb8"]\ + # VTS [list \x8a "${ob8}\ue02a $cb8"]\ + # PLD [list \x8b "${ob8}\ue02a $cb8"]\ + # PLU [list \x8c "${ob8}\ue02c $cb8"]\ + # RI [list \x8d "${ob8}\ue02d $cb8"]\ + # SS2 [list \x8e "${ob8}\ue02e $cb8"]\ + # SS3 [list \x8f "${ob8}\ue02f $cb8"]\ + # DCS [list \x90 "${ob8}\ue030 $cb8"]\ + # PU1 [list \x91 "${ob8}\ue031 $cb8"]\ + # PU2 [list \x92 "${ob8}\ue032 $cb8"]\ + # STS [list \x93 "${ob8}\ue033 $cb8"]\ + # CCH [list \x94 "${ob8}\ue034 $cb8"]\ + # MW [list \x95 "${ob8}\ue035 $cb8"]\ + # SPA [list \x96 "${ob8}\ue036 $cb8"]\ + # EPA [list \x97 "${ob8}\ue037 $cb8"]\ + # SOS [list \x98 "${ob8}\ue038 $cb8"]\ + # SCI [list \x9a "${ob8}\ue03a $cb8"]\ + # CSI [list \x9b "${ob8}\ue03b $cb8"]\ + # ST [list \x9c "${ob8}\ue03c $cb8"]\ + # OSC [list \x9d "${ob8}\ue03d $cb8"]\ + # PM [list \x9e "${ob8}\ue03e $cb8"]\ + # APC [list \x9f "${ob8}\ue03f $cb8"]\ + #] + + #these 2 letter codes only need to disambiguate within the c1 set - they're not great. + #these sit within the Latin-1 Supplement block + set visuals_c1 [tcl::dict::create\ + PAD [list \x80 "${ob8}PD$cb8"]\ + HOP [list \x81 "${ob8}HP$cb8"]\ + BPH [list \x82 "${ob8}BP$cb8"]\ + NBH [list \x83 "${ob8}NB$cb8"]\ + IND [list \x84 "${ob8}IN$cb8"]\ + NEL [list \x85 "${ob8}NE$cb8"]\ + SSA [list \x86 "${ob8}SS$cb8"]\ + ESA [list \x87 "${ob8}ES$cb8"]\ + HTS [list \x88 "${ob8}HS$cb8"]\ + HTJ [list \x89 "${ob8}HT$cb8"]\ + VTS [list \x8a "${ob8}VT$cb8"]\ + PLD [list \x8b "${ob8}PD$cb8"]\ + PLU [list \x8c "${ob8}PU$cb8"]\ + RI [list \x8d "${ob8}RI$cb8"]\ + SS2 [list \x8e "${ob8}S2$cb8"]\ + SS3 [list \x8f "${ob8}S3$cb8"]\ + DCS [list \x90 "${ob8}DC$cb8"]\ + PU1 [list \x91 "${ob8}P1$cb8"]\ + PU2 [list \x92 "${ob8}P2$cb8"]\ + STS [list \x93 "${ob8}SX$cb8"]\ + CCH [list \x94 "${ob8}CC$cb8"]\ + MW [list \x95 "${ob8}MW$cb8"]\ + SPA [list \x96 "${ob8}SP$cb8"]\ + EPA [list \x97 "${ob8}EP$cb8"]\ + SOS [list \x98 "${ob8}SO$cb8"]\ + SCI [list \x9a "${ob8}SC$cb8"]\ + CSI [list \x9b "${ob8}CS$cb8"]\ + ST [list \x9c "${ob8}ST$cb8"]\ + OSC [list \x9d "${ob8}OS$cb8"]\ + PM [list \x9e "${ob8}PM$cb8"]\ + APC [list \x9f "${ob8}AP$cb8"]\ + ] + + + set hack [tcl::dict::create] + tcl::dict::set hack BOM1 [list \uFEFF "${obm}\U1f4a3$cbm"] ;#byte order mark/ ZWNBSP (ZWNBSP usage generally deprecated) - a picture of a bomb(2wide glyph) + #review - other boms? Encoding dependent? + + tcl::dict::set hack DCS [list \x90 "${ob8}\u2328 $cb8"] ;#keyboard from Miscellaneous Technical - 1 wide + pad. + tcl::dict::set hack SOS [list \x98 "${ob8}\u2380 $cb8"] ;#Insertion Symbol from Miscellaneous Technical - 1 wide + pad + tcl::dict::set hack ST [list \x9c "${ob8}\u2383 $cb8"] ;#Emphasis Symbol from Miscellaneous Technical - 1 wide + pad (graphically related to \u2380) + tcl::dict::set hack CSI [list \x9b "${ob8}\u2386 $cb8"] ;#Enter Symbol from Miscellaneous Technical - 1 wide + pad + tcl::dict::set hack OSC [list \x9d "${ob8}\u2b55$cb8"] ;#bright red ring from Miscellaneous Symbols and Arrows - 2 wide (OSC could be used for clipboard or other potentially security sensitive functions) + tcl::dict::set hack PM [list \x9e "${ob8}PM$cb8"] + tcl::dict::set hack APC [list \x9f "${ob8}\U1f534$cb8"] ;#bright red ball from Miscellaneoust Symbols and Pictographs - 2 wide (APC also noted as a potential security risk) + + set debug_visuals [tcl::dict::merge $visuals_c0 $visuals_c1 $hack] + + #for repeated interaction with the same ANSI string - a mechanism to store state is more efficient + proc NEW {string} { + punk::ansi::class::class_ansistring new $string + } + proc VIEW {args} { + #*** !doctools + #[call [fun VIEW] [arg string]] + #[para]Return a string with specific ANSI control characters substituted with visual equivalents frome the appropriate unicode C0 and C1 visualisation sets + #[para]For debugging purposes, certain other standard control characters are converted to visual representation, for example backspace (mapped to \\U2408 '\U2408') + #[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions + #[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is. + + variable debug_visuals + + if {![llength $args]} { + return "" + } + + set string [lindex $args end] + set defaults [tcl::dict::create\ + -esc 1\ + -cr 1\ + -lf 0\ + -vt 0\ + -ht 1\ + -bs 1\ + -sp 1\ + ] + set argopts [lrange $args 0 end-1] + if {[llength $argopts] % 2 != 0} { + error "ansistring VIEW options must be option-value pairs, received '$argopts'. Known opts [tcl::dict::keys $defaults]" + } + set opts [tcl::dict::merge $defaults $argopts] + # -- --- --- --- --- + set opt_esc [tcl::dict::get $opts -esc] + set opt_cr [tcl::dict::get $opts -cr] + set opt_lf [tcl::dict::get $opts -lf] + set opt_vt [tcl::dict::get $opts -vt] + set opt_ht [tcl::dict::get $opts -ht] + set opt_bs [tcl::dict::get $opts -bs] + set opt_sp [tcl::dict::get $opts -sp] + # -- --- --- --- --- + + + + set visuals_opt $debug_visuals + if {$opt_esc} { + tcl::dict::set visuals_opt ESC [list \x1b \u241b] + } + if {$opt_cr} { + tcl::dict::set visuals_opt CR [list \x0d \u240d] + } + if {$opt_lf == 1} { + tcl::dict::set visuals_opt LF [list \x0a \u240a] + } + if {$opt_lf == 2} { + tcl::dict::set visuals_opt LF [list \x0a \u240a\n] + } + if {$opt_vt} { + tcl::dict::set visuals_opt VT [list \x0b \u240b] + } + if {$opt_ht} { + tcl::dict::set visuals_opt HT [list \x09 \u2409] + } + if {$opt_bs} { + tcl::dict::set visuals_opt BS [list \x08 \u2408] + } + if {$opt_sp} { + tcl::dict::set visuals_opt SP [list \x20 \u2420] + } + + #set visuals [tcl::dict::merge $visuals_opt $debug_visuals] + #set charmap [list] + #tcl::dict::for {nm chars} $visuals_opt { + # lappend charmap {*}$chars + #} + #return [tcl::string::map $charmap $string] + return [tcl::string::map [concat {*}[dict values $visuals_opt]] $string] + + + #test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs + #return [tcl::string::map [list \033 \U2296 \007 \U237E] $string] + } + + #The implementation of viewcodes,viewstyle is more efficiently done in an object for the case where repeated calls of various methods can re-use the internal splits. + #for oneshots here - there is only minor overhead to use and destroy the object here. + proc VIEWCODES {args} { + set string [lindex $args end] + if {$string eq ""} { + return "" + } + set arglist [lrange $args 0 end-1] + set ansistr [ansistring NEW $string] + set result [$ansistr viewcodes {*}$arglist] + $ansistr destroy + return $result + } + #an attempt to show the codes and colour/style of the *input* + #ie we aren't looking at the row/column positioning - but we do want to keep track of cursor attribute saves and restores + proc VIEWSTYLE {args} { + set string [lindex $args end] + if {$string eq ""} { + return "" + } + set arglist [lrange $args 0 end-1] + set ansistr [ansistring NEW $string] + set result [$ansistr viewstyle {*}$arglist] + $ansistr destroy + return $result + } + + + #todo - change to COUNT to emphasize the difference between this and doing a Tcl string length on the ansistriped string! + #review. Tabs/elastic tabstops. Do we want to count a tab as one element? Probably so if we are doing so for \n etc and not counting 2W unicode. + #Consider leaving tab manipualation for a width function which determines columns occupied for all such things. + proc COUNT {string} { + #*** !doctools + #[call [fun COUNT] [arg string]] + #[para]Returns the count of visible graphemes and non-ansi control characters + #[para]Incomplete! grapheme clustering support not yet implemented - only diacritics are currently clustered to count as one grapheme. + #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. + #[para]This is not quite equivalent to calling string length on the result of ansistrip $string due to diacritics and/or grapheme combinations + #[para]Note that this returns the number of characters in the payload (after applying combiners) + #It is not always the same as the width of the string as rendered on a terminal due to 2wide Unicode characters and the usual invisible control characters such as \r and \n + #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. + + #stripping diacritics only makes sense if we are counting them as combiners and also treating unicode grapheme combinations as single entities. + #as Our ansistring INDEX function returns the character with diacritics, and will ultimately return grapheme clusters as a single element - we strip theme here as not counted. + #todo - combiners/diacritics? just map them away here? + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + set string [regsub -all $re_diacritics $string ""] + + #we want length to return number of glyphs.. not screen width. Has to be consistent with index function + tcl::string::length [ansistrip $string] + } + #included as a test/verification - slightly slower. + #grapheme split version may end up being used once it supports unicode grapheme clusters + proc count2 {string} { + #we want count to return number of glyphs.. not screen width. Has to be consistent with index function + return [llength [punk::char::grapheme_split [ansistrip $string]]] + } + + proc length {string} { + tcl::string::length [ansistrip $string] + } + + proc _splits_trimleft {sclist} { + set intext 0 + set outlist [list] + foreach {pt ansiblock} $sclist { + if {$ansiblock ne ""} { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" $ansiblock + } else { + lappend outlist [tcl::string::trimleft $pt] $ansiblock + set intext 1 + } + } else { + lappend outlist $pt $ansiblock + } + } else { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" + } else { + lappend outlist [tcl::string::trimleft $pt] + set intext 1 + } + } else { + lappend outlist $pt + } + } + } + return $outlist + } + proc _splits_trimright {sclist} { + set intext 0 + set outlist [list] + #we need to account for empty ansiblock var caused by dual-var iteration over odd length list + foreach {pt ansiblock} [lreverse $sclist] { + if {$ansiblock ne ""} { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" $ansiblock + } else { + lappend outlist [tcl::string::trimright $pt] $ansiblock + set intext 1 + } + } else { + lappend outlist $pt $ansiblock + } + } else { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + lappend outlist "" + } else { + lappend outlist [tcl::string::trimright $pt] + set intext 1 + } + } else { + lappend outlist $pt + } + } + } + return [lreverse $outlist] + } + + proc _splits_trim {sclist} { + return [_splits_trimright [_splits_trimleft $sclist]] + } + + #Note that trim/trimleft/trimright will trim spaces at the extremities that are styled with background colour, underline etc + #that may be unexpected, but it's probably the only thing that makes sense. Plain string trim can chop off whitespace that is extraneous to the ansi entirely. + proc trimleft {string args} { + set intext 0 + set out "" + #for split_codes only first or last pt can be empty string - but we can also get an empty ansiblock by using foreach with 2 vars on an odd-length list + foreach {pt ansiblock} [split_codes $string] { + if {!$intext} { + if {$pt eq "" || [regexp {^\s+$} $pt]} { + append out $ansiblock + } else { + append out [tcl::string::trimleft $pt]$ansiblock + set intext 1 + } + } else { + append out $pt$ansiblock + } + } + return $out + } + proc trimright {string} { + if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing + set rtrimmed_list [_splits_trimright [split_codes $string]] + return [join $rtrimmed_list ""] + } + proc trim {string} { + #make sure we do our ansi-scanning split only once - so use list-based trim operations + #order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length + #we save a single function call by calling both here rather than _splits_trim + join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" + } + + #Capitalised because it's the clustered grapheme/controlchar index - not the tcl string index + proc INDEX {string index} { + #*** !doctools + #[call [fun index] [arg string] [arg index]] + #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) + #[para]Returns the character (with applied ansi effect) at position index + #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. + #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) + #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. + #[para]If the caller wants just the character - they should use a normal string index after calling ansistrap, or call ansistrip afterwards. + #[para]As any operation using end-+ will need to strip ansi to precalculate the length anyway; the caller should probably just use ansistrip and standard string index if the ansi coded output isn't required and they are using and end-based index. + #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using ansistrip and normal string operations on that. + #[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code + #[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. + #[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. + #[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible + #[para]Notes: + #[para]This function has to split the whole string into plaintext & ansi codes even for a very low index + #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. + #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal + + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + + #todo - end-x +/-x+/-x etc + set original_index $index + + set index [tcl::string::map [list _ ""] $index] + #short-circuit some trivial cases + if {[tcl::string::is integer -strict $index]} { + if {$index < 0} {return ""} + #this only short-circuits an index greater than length including ansi-chars + #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length + if {$index > [tcl::string::length $string]} {return ""} + } else { + if {[tcl::string::match end* $index]} { + #for end- we will probably have to blow a few cycles stripping first and calculate the length + if {$index ne "end"} { + set op [tcl::string::index $index 3] + set offset [tcl::string::range $index 4 end] + if {$op ni {+ -} || ![tcl::string::is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$op eq "+" && $offset != 0} { + return "" + } + } else { + set offset 0 + } + #by now, if op = + then offset = 0 so we only need to handle the minus case + set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal + if {$offset == 0} { + set index [expr {$payload_len-1}] + } else { + set index [expr {($payload_len-1) - $offset}] + } + if {$index < 0} { + #don't waste time splitting and looping the string + return "" + } + } else { + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string + if {[tcl::string::is integer -strict $tail]} { + #plain +- + if {$op eq "-"} { + #return nothing for negative indices as per Tcl's lindex etc + return "" + } + set index $tail + } else { + if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { + if {[tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } + } + } + + #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) + set low -1 + set high -1 + set pt_index -2 + set pt_found -1 + set char "" + #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced + set codestack [list] + #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go + #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) + foreach {pt code} $ansisplits { + incr pt_index 2 + #we want an index per grapheme - whether it is doublewide or single + + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set low [expr {$high + 1}] ;#last high + #incr high [tcl::string::length $pt] + incr high [llength $graphemes] + } + + if {$pt ne "" && ($index >= $low && $index <= $high)} { + set pt_found $pt_index + #set char [tcl::string::index $pt $index-$low] + set char [lindex $graphemes $index-$low] + break + } + + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #we can throw away previous codestack + set codestack [list] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + #may have partial resets + #sgr_merge_list will handle at end + #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + #Review - consider if any other types of code make sense to retain in the output in this context. + if {[punk::ansi::codetype::is_sgr $code]} { + lappend codestack $code + } + } + + } + if {$pt_found >= 0} { + return [punk::ansi::codetype::sgr_merge_list {*}$codestack]$char + } else { + return "" + } + } + + #helper to convert indices (possibly of form x+y end-x etc) to numeric values within the payload range i.e without ansi + #return empty string for each index that is out of range + #review - this is possibly too slow to be very useful as is. + # consider converting to oo and maintaining state of ansisplits so we don't repeat relatively expensive operations for same string + #see also punk::lindex_resolve / punk::lindex_get for ways to handle tcl list/string indices without parsing them. + proc INDEXABSOLUTE {string args} { + set payload_len -1 ;# -1 as token to indicate we haven't calculated it yet (only want to call it once at most) + set testindices [list] + foreach index $args { + if {[tcl::string::is integer -strict $index]} { + if {$index < 0} { + lappend testindices "" + } elseif {$index > [tcl::string::length $string]} { + #this only short-circuits an index greater than length including ansi-chars + #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length + lappend testindices "" + } else { + lappend testindices $index + } + } else { + if {[tcl::string::match end* $index]} { + #for end- we will probably have to blow a few cycles stripping first and calculate the length + if {$index ne "end"} { + set op [tcl::string::index $index 3] + set offset [tcl::string::range $index 4 end] + if {$op ni {+ -} || ![tcl::string::is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$op eq "+" && $offset != 0} { + lappend testindices "" + continue + } + } else { + set offset 0 + } + #by now, if op = + then offset = 0 so we only need to handle the minus case + if {$payload_len == -1} { + set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal + } + if {$offset == 0} { + set index [expr {$payload_len-1}] + } else { + set index [expr {($payload_len-1) - $offset}] + } + if {$index < 0} { + lappend testindices "" + } else { + lappend testindices $index + } + } else { + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string + if {[tcl::string::is integer -strict $tail]} { + #plain +- + if {$op eq "-"} { + #return nothing for negative indices as per Tcl's lindex etc + lappend indices "" + continue + } + set index $tail + lappend testindices $index + } else { + if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { + if {[tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + lappend testindices $index + } + } + } + #assertion - we made exactly one append to testindices if there was no error + } + #we now have numeric or empty string indices - but haven't fully checked they are within the underlying payload length + + if {[join $testindices ""] eq ""} { + #don't calc ansistring length if no indices to check + return $testindices + } + if {$payload_len == -1} { + set payload_len [punk::ansi::ansistring::length $string] + } + set indices [list] + foreach ti $testindices { + if {$ti ne ""} { + if {$ti < $payload_len} { + lappend indices $ti + } else { + lappend indices "" + } + } else { + lappend indices "" + } + } + return $indices + + } + + #Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc. + #The column/row concept works for an ansistring that has been 'rendered' to some defined area. + #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows. + + #return pair of column extents occupied by the character index supplied. + #single-width grapheme will return pair of integers of equal value + #doulbe-width grapheme will return a pair of consecutive indices + proc INDEXCOLUMNS {string idx} { + #There is an index per grapheme - whether it is 1 or 2 columns wide + set index [lindex [INDEXABSOLUTE $string $idx] 0] + if {$index eq ""} { + return "" + } + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + set low -1 ;#low and high grapheme indexes + set high -1 + set lowc 0 ;#low and high column (1 based) + set highc 0 + set col1 "" + set col2 "" + set row 1 + foreach {pt code} $ansisplits { + if {$pt ne ""} { + set ptlines [split $pt \n] + set ptlinecount [llength $ptlines] + set ptlineindex 0 + foreach ptline $ptlines { + set graphemes [punk::char::grapheme_split $ptline] + if {$ptlineindex > 0} { + #todo - account for previous \n as a grapheme .. what column? It should theoretically be in the rightmost column + #zero width + set low [expr {$high + 1}] + set lowc [expr {$highc + 1}] + set high $low + set highc $lowc + if {$index == $low} { + set char \n + set col1 $lowc + set col2 $col1 + break + } + incr row + set lowc 0 + set highc 0 + } + set low [expr {$high + 1}] ;#last high + set lowc [expr {$highc + 1}] + set high [expr {$low + [llength $graphemes] -1}] + set highc [expr {$lowc + [punk::char::ansifreestring_width $ptline] -1}] + #puts "---row:$row lowc:$lowc highc:$highc $ptline graphemes:$graphemes" + if {$index >= $low && $index <= $high} { + set char [lindex $graphemes $index-$low] + set prefix [join [lrange $graphemes 0 [expr {$index-$low-1}]] ""] + set prefixlen [punk::char::ansifreestring_width $prefix] + set col1 [expr {$lowc + $prefixlen}] + set gwidth [punk::char::ansifreestring_width $char] + if {$gwidth < 1} { + puts stderr "ansistring INDEXCOLUMNS warning - grapheme width zero at column $col1 ??" + return "" ;#grapheme doesn't occupy a column and isn't a newline? - review + } + set col2 [expr {$col1 + ($gwidth -1)}] + break + } + incr ptlineindex + } + } + } + if {$col1 ne "" & $col2 ne ""} { + return [list $col1 $col2] + } + } + + #multiple rows - return a list? + #return the grapheme index that occupies column col (could be first or second half of 2-wide grapheme) + proc COLUMNINDEX {string col} { + + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + set lowindex -1 ;#low and high grapheme indexes + set highindex -1 + set lowc 0 ;#low and high column (1 based) + set highc 0 + set col1 "" + set col2 "" + foreach {pt code} $ansisplits { + if {$pt ne ""} { + if {[tcl::string::last \n $pt] < 0} { + set graphemes [punk::char::grapheme_split $pt] + set lowindex [expr {$highindex + 1}] ;#last high + set lowc [expr {$highc + 1}] + set highindex [expr {$lowindex + [llength $graphemes] -1}] + set highc [expr {$lowc + [punk::char::ansifreestring_width $pt] -1}] + if {$col >= $lowc && $col <= $highc} { + if {$col == $lowc} { + return $lowindex + } elseif {$col == $highc} { + return $highindex + } + set index [expr {$lowindex -1}] + set str "" + foreach g $graphemes { + incr index + append str $g + set width [punk::char::ansifreestring_width $str] + if {$lowc-1 + $width >= $col} { + return $index + } + } + error "ansistring COLUMNINDEX '$string' $col not found" ;#assertion - shouldn't happen + } + } else { + error "ansistring COLUMNINDEX multiline not implemented" + } + } + } + } + + #inserting into global namespace like this should be kept to a minimum.. but this is considered a core aspect of punk::ansi + #todo - document + interp alias {} ansistring {} ::punk::ansi::ansistring + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] +} + +tcl::namespace::eval punk::ansi::internal { + proc splitn {str {len 1}} { + #from textutil::split::splitn + if {$len <= 0} { + return -code error "len must be > 0" + } + if {$len == 1} { + return [split $str {}] + } + set result [list] + set max [tcl::string::length $str] + set i 0 + set j [expr {$len -1}] + while {$i < $max} { + lappend result [tcl::string::range $str $i $j] + incr i $len + incr j $len + } + return $result + } + proc splitx {str {regexp {[\t \r\n]+}}} { + #from textutil::split::splitx + # Bugfix 476988 + if {$str eq ""} { + return {} + } + if {$regexp eq ""} { + return [::split $str ""] + } + if {[regexp $regexp {}]} { + return -code error \ + "splitting on regexp \"$regexp\" would cause infinite loop" + } + set list {} + set start 0 + while {[regexp -start $start -indices -- $regexp $str match submatch]} { + #foreach {subStart subEnd} $submatch break + lassign $submatch subStart subEnd + #foreach {matchStart matchEnd} $match break + lassign $match matchStart matchEnd + incr matchStart -1 + incr matchEnd + lappend list [tcl::string::range $str $start $matchStart] + if {$subStart >= $start} { + lappend list [tcl::string::range $str $subStart $subEnd] + } + set start $matchEnd + } + lappend list [tcl::string::range $str $start end] + return $list + } + + proc printing_length_addchar {i c} { + upvar outchars outc + upvar outsizes outs + set nxt [llength $outc] + if {$i < $nxt} { + lset outc $i $c + } else { + lappend outc $c + } + } + + #string to 2digit hex - e.g used by XTGETTCAP + proc str2hex {input} { + set 2hex "" + foreach ch [split $input ""] { + append 2hex [format %02X [scan $ch %c]] + } + return $2hex + } + proc hex2str {2digithexchars} { + set 2digithexchars [tcl::string::map [list _ ""] $2digithexchars] ;#compatibility with tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) + if {$2digithexchars eq ""} { + return "" + } + if {[tcl::string::length $2digithexchars] % 2 != 0} { + error "hex2str requires an even number of hex digits (2 per character)" + } + set 2str "" + foreach pair [splitn $2digithexchars 2] { + append 2str [format %c 0x$pair] + } + return $2str + } +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::ansi [tcl::namespace::eval punk::ansi { + variable version + set version 0.1.1 +}] +return + + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm new file mode 100644 index 00000000..5e270ac8 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.1.0.tm @@ -0,0 +1,1537 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::args 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::args 0 0.1.0] +#[copyright "2024"] +#[titledesc {args parsing}] [comment {-- Name section and table of contents description --}] +#[moddesc {args to nested dict of opts and values}] [comment {-- Description at end of page heading --}] +#[require punk::args] +#[keywords module proc args arguments parse] +#[description] +#[para]Utilities for parsing proc args + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] There are many ways to parse arguments and many (too many?) packages to do it (see below for a discussion of packages and pure-tcl mechanisms). +#[para] overview of punk::args +#[subsection Concepts] +#[para]There are 2 main conventions for parsing a proc args list +#[list_begin enumerated] +#[enum] +#[para]leading option-value pairs and flags followed by a list of values (Tcl style) +#[enum] +#[para]leading list of values followed by option-value pairs and flags (Tk style) +#[list_end] +#[para]There are exceptions in both Tcl and Tk commands regarding this ordering +#[para]punk::args is focused on the 1st convention (Tcl style): parsing of the 'args' variable in leading option-value pairs (and/or solo flags) style +#[para]The proc can still contain some leading required values e.g [example "proc dostuff {arg1 arg2 args} {...}}"] +#[para]but having the core values elements at the end of args is arguably more generally useful - especially in cases where the number of trailing values is unknown and/or the proc is to be called in a functional 'pipeline' style. +#[para] +#[para]The basic principle is that a call to punk::args::get_dict is made near the beginning of the proc with a cacheable first argument defining the parameters e.g +#[example { +# proc dofilestuff {args} { +# lassign [dict values [punk::args::get_dict { +# *proc -help "do some stuff with files e.g dofilestuff " +# *opts -type string +# #comment lines ok +# -directory -default "" +# -translation -default binary +# #setting -type none indicates a flag that doesn't take a value (solo flag) +# -nocomplain -type none +# *values -min 1 -max -1 +# } $args]] opts values +# +# puts "translation is [dict get $opts -translation]" +# foreach f [dict values $values] { +# puts "doing stuff with file: $f" +# } +# } +#}] +#[para]The lines beginning with * are optional in most cases and can be used to set defaults and some extra controls +#[para] - the above example would work just fine with only the - lines, but would allow zero filenames to be supplied as no -min value is set for *values +#[para]valid * lines being with *proc *opts *values +#[para]lines beginning with a dash define options - a name can optionally be given to each trailing positional argument. +#[para]If no names are defined for positional arguments, they will end up in the values key of the dict with numerical keys starting at zero. +#[para]e.g the result from the punk::args call above may be something like: +#[para] opts {-translation binary -directory "" -nocomplain 0} values {0 file1.txt 1 file2.txt 2 file3.txt} +#[para]Here is an example that requires the number of values supplied to be exactly 2 and names the positional arguments +#[para]It also demonstrates an inital argument 'category' that is outside of the scope for punk::args processing - allowing leading and trailing positional arguments +#[example { +# proc dofilestuff {category args} { +# lassign [dict values [punk::args::get_dict { +# -directory -default "" +# -translation -default binary +# -nocomplain -type none +# *values -min 2 -max 2 +# fileA -type existingfile 1 +# fileB -type existingfile 1 +# } $args]] opts values +# puts "$category fileA: [dict get $values fileA]" +# puts "$category fileB: [dict get $values fileB]" +# } +#}] +#[para]By using standard tcl proc named arguments prior to args, and setting *values -min 0 -max 0 +#[para]a Tk-style ordering can be acheived, where punk::args is only handling the trailing flags and the values element of the returned dict can be ignored +#[para]This use of leading positional arguments means the type validation features can't be applied to them. It can be done manually as usual, +#[para] or an additional call could be made to punk::args e.g +#[example { +# punk::args::get_dict { +# category -choices {cat1 cat2 cat3} +# another_leading_arg -type boolean +# } [list $category $another_leading_arg] +#}] + +#*** !doctools +#[subsection Notes] +#[para]For internal functions not requiring features such as solo flags, prefix matching, type checking etc - a well crafted switch statement will be the fastest pure-tcl solution. +#[para] +#When functions are called often and/or in inner loops, a switch based solution generally makes the most sense. +#For functions that are part of an API a package may be more suitable. +#[para]The following example shows a switch-based solution that is highly performant (sub microsecond for the no-args case) +#[example { +# proc test_switch {args} { +# set opts [dict create\\ +# -return "object"\\ +# -frametype "heavy"\\ +# -show_edge 1\\ +# -show_seps 0\\ +# -x a\\ +# -y b\\ +# -z c\\ +# -1 1\\ +# -2 2\\ +# -3 3\\ +# ] +# foreach {k v} $args { +# switch -- $k { +# -return - -show_edge - -show_seps - -frametype - -x - -y - -z - -1 - -2 - -3 { +# dict set opts $k $v +# } +# default { +# error "unrecognised option '$k'. Known options [dict keys $opts]" +# } +# } +# } +# return $opts +# } +#}] +#[para]Note that the switch statement uses literals so that the compiler produces a jump-table for best performance. +#[para] +# Attempting to build the switch branch using the values from dict keys $opts will stop the jump table being built. +# To create the faster switch statement without repeating the key names, the proc body would need to be built using string map. +#[para]use punk::lib::show_jump_tables to verify that a jump table exists. +#[para]Nearly as performant due to the c-coded tcl::prefix::match function built into Tcl is the following example - which also allows shortened option names if they are unambiguous +#[example { +# proc test_prefix {args} { +# set opts [dict create\ +# -return string\ +# -frametype \uFFEF\ +# -show_edge \uFFEF\ +# -show_seps \uFFEF\ +# -x a\ +# -y b\ +# -z c\ +# -1 1\ +# -2 2\ +# -3 3\ +# ] +# if {[llength $args]} { +# set knownflags [dict keys $opts] +# } +# foreach {k v} $args { +# dict set opts [tcl::prefix::match -message "test_prefix option $k" $knownflags $k] $v +# } +# return $opts +# } +#}] +#[para]There are many alternative args parsing packages a few of which are listed here. +#[list_begin enumerated] +#[enum]argp (pure tcl) +#[enum]parse_args (c implementation) +#[enum]argparse (pure tcl *) +#[enum]cmdline (pure tcl) +#[enum]opt (pure tcl) distributed with Tcl but considered deprecated +#[enum]The tcllib set of TEPAM modules (pure tcl) +#[para]TEPAM requires an alternative procedure declaration syntax instead of proc - but has support for Tk and documentation generation. +#[list_end] +#[para] (* c implementation planned/proposed) +#[para]punk::args was designed initially without specific reference to TEPAM - and to handle some edge cases in specific projects where TEPAM wasn't suitable. +#[para]In subsequent revisions of punk::args - some features were made to operate in a way that is similar to TEPAM - to avoid gratuitous differences where possible, but of course there are differences +#[para]and those used TEPAM or mixing TEPAM and punk::args should take care to assess the differences. +#[para]TEPAM is a mature solution and is widely available as it is included in tcllib. +#[para]Serious consideration should be given to using TEPAM or one of the other packages, if suitable for your project. +#[para]punk::args is relatively performant for a pure-tcl package solution - with the parsing of the argument specification block occuring only on the first run - after which a cached version of the spec is used. +#[para]punk::args is not limited to procs. It can be used in apply or coroutine situations for example. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +#All ensemble commands are slower in a safe interp as they aren't compiled the same way +#https://core.tcl-lang.org/tcl/tktview/1095bf7f75 +#as this is needed in safe interps too, and the long forms tcl::dict::for tcl::string::map are no slower in a normal interp - we use the long form here. +#(As at 2024-06 There are many tcl8.6/8.7 interps in use which are affected by this and it's unknown when/whether it will be fixed) +#ensembles: array binary clock dict info namespace string +#possibly file too, although that is generally hidden/modified in a safe interp +#chan,encoding always seems to use invokeStk1 anyway - yet there are performance improvements using ::tcl::chan::names etc +#interestingly in tcl8.7 at least - tcl::namespace::eval $somens $somescript is slightly faster than namespace eval even in unsafe interp + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::args +#[list_begin itemized] +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::args::class { + #*** !doctools + #[subsection {Namespace punk::args::class}] + #[para] class definitions + if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::args { + tcl::namespace::export {[a-z]*} + variable argspec_cache + variable argspecs + variable id_counter + set argspec_cache [tcl::dict::create] + set argspecs [tcl::dict::create] + set id_counter 0 + + #*** !doctools + #[subsection {Namespace punk::args}] + #[para] Core API functions for punk::args + #[list_begin definitions] + + + #todo? -synonym ? (applies to opts only not values) + #e.g -background -synonym -bg -default White + + proc Get_argspecs {optionspecs args} { + variable argspec_cache + variable argspecs + variable initial_optspec_defaults + variable initial_valspec_defaults + #ideally we would use a fast hash algorithm to produce a short key with low collision probability. + #something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. + #review - check if there is a built-into-tcl way to do this quickly + #for now we will just key using the whole string + set cache_key $optionspecs + if {[tcl::dict::exists $argspec_cache $cache_key]} { + return [tcl::dict::get $argspec_cache $cache_key] + } + + set optionspecs [tcl::string::map [list \r\n \n] $optionspecs] + #probably faster to inline a literal dict create in the proc than to use a namespace variable + set optspec_defaults [tcl::dict::create\ + -type string\ + -optional 1\ + -allow_ansi 1\ + -validate_without_ansi 0\ + -strip_ansi 0\ + -nocase 0\ + -multiple 0\ + ] + set valspec_defaults [tcl::dict::create\ + -type string\ + -optional 0\ + -allow_ansi 1\ + -validate_without_ansi 0\ + -strip_ansi 0\ + -multiple 0\ + ] + + #checks with no default + #-minlen -maxlen -range + + + #default -allow_ansi to 1 and -validate_without_ansi to 0 and -strip_ansi 0 - it takes time to strip ansi + #todo - detect if anything in the spec uses -allow_ansi 0, -validate_without_ansi 1 or -strip_ansi 1 and set a flag indicating if punk::ansi::ta::detect should be run on the argslist + set opt_required [list] + set val_required [list] + set arg_info [tcl::dict::create] + set arg_checks [tcl::dict::create] + set opt_defaults [tcl::dict::create] + set opt_names [list] ;#defined opts + set val_defaults [tcl::dict::create] + set opt_solos [list] + #first process dashed and non-dashed argspecs without regard to whether non-dashed are at the beginning or end + set val_names [list] + + set records [list] + set linebuild "" + + set linelist [split $optionspecs \n] + set lastindent "" + foreach ln $linelist { + if {[tcl::string::trim $ln] eq ""} {continue} + regexp {(\s*).*} $ln _all lastindent + break ;#break at first non-empty + } + #puts "indent1:[ansistring VIEW $lastindent]" + set in_record 0 + foreach rawline $linelist { + set recordsofar [tcl::string::cat $linebuild $rawline] + #ansi colours can stop info complete from working (contain square brackets) + if {![tcl::info::complete [punk::ansi::ansistrip $recordsofar]]} { + #append linebuild [string trimleft $rawline] \n + if {$in_record} { + if {[tcl::string::length $lastindent]} { + #trim only the whitespace corresponding to last indent - not all whitespace on left + if {[tcl::string::first $lastindent $rawline] == 0} { + set trimmedline [tcl::string::range $rawline [tcl::string::length $lastindent] end] + append linebuild $trimmedline \n + } else { + append linebuild $rawline \n + } + } else { + append linebuild $rawline \n + } + } else { + set in_record 1 + regexp {(\s*).*} $rawline _all lastindent + #puts "indent: [ansistring VIEW -lf 1 $lastindent]" + #puts "indent from rawline:$rawline " + append linebuild $rawline \n + } + } else { + set in_record 0 + if {[tcl::string::length $lastindent]} { + #trim only the whitespace corresponding to last indent - not all whitespace on left + if {[tcl::string::first $lastindent $rawline] == 0} { + set trimmedline [tcl::string::range $rawline [tcl::string::length $lastindent] end] + append linebuild $trimmedline + } else { + append linebuild $rawline + } + } else { + append linebuild $rawline + } + lappend records $linebuild + set linebuild "" + } + } + set proc_info {} + set opt_any 0 + set val_min 0 + set val_max -1 ;#-1 for no limit + set spec_id "" + foreach ln $records { + set trimln [tcl::string::trim $ln] + switch -- [tcl::string::index $trimln 0] { + "" - # {continue} + } + set linespecs [lassign $trimln argname] + if {$argname ne "*id" && [llength $linespecs] %2 != 0} { + error "punk::args::get_dict - bad optionspecs line for record '$argname' Remaining items on line must be in paired option-value format - received '$linespecs'" + } + set firstchar [tcl::string::index $argname 0] + set secondchar [tcl::string::index $argname 1] + if {$firstchar eq "*" && $secondchar ne "*"} { + set starspecs $linespecs + switch -- [tcl::string::range $argname 1 end] { + id { + #id line must have single entry - a unique id assigned by the user - an id will be allocated if no id line present or the value is "auto" + if {[llength $starspecs] != 1} { + error "punk::args::Get_argspecs - *id line must have a single entry following *id." + } + if {$spec_id ne ""} { + #disallow duplicate *id line + error "punk::args::Get_argspecs - *id already set. Existing value $spec_id" + } + set spec_id $starspecs + } + proc { + #allow arbitrary - review + set proc_info $starspecs + } + opts { + foreach {k v} $starspecs { + switch -- $k { + -any - + -anyopts { + set opt_any $v + } + -minlen - -maxlen - -range - -choices - -choicelabels { + #review - only apply to certain types? + tcl::dict::set optspec_defaults $k $v + } + -nominlen - -nomaxlen - -norange - -nochoices - -nochoicelabels { + if {$v} { + tcl::dict::unset optspec_defaults $k + } + } + -type { + switch -- $v { + int - integer { + set v int + } + char - character { + set v char + } + bool - boolean { + set v bool + } + dict - dictionary { + set v dict + } + none - any - ansistring { + + } + list { + + } + default { + #todo - disallow unknown types unless prefixed with custom- + } + } + tcl::dict::set optspec_defaults $k $v + } + -optional - + -allow_ansi - + -validate_without_ansi - + -strip_ansi - + -multiple { + #allow overriding of defaults for options that occur later + tcl::dict::set optspec_defaults $k $v + } + default { + set known { -any -anyopts -minlen -maxlen -range -choices -choicelabels\ + -nominlen -nomaxlen -norange -nochoices -nochoicelabels\ + -type -optional -allow_ansi -validate_without_ansi -strip_ansi -multiple\ + } + error "punk::args::Get_argspecs - unrecognised key '$k' in *opts line. Known keys: $known" + } + } + } + } + values { + foreach {k v} $starspecs { + switch -- $k { + -min - + -minvalues { + set val_min $v + } + -max - + -maxvalues { + set val_max $v + } + -minlen - -maxlen - -range - -choices - -choicelabels { + #review - only apply to certain types? + tcl::dict::set valspec_defaults $k $v + } + -nominlen - -nomaxlen - -norange - -nochoices - -nochoicelabels { + if {$v} { + tcl::dict::unset valspec_defaults $k + } + } + -type { + switch -- $v { + int - integer { + set v int + } + char - character { + set v char + } + bool - boolean { + set v bool + } + dict - dictionary { + set v dict + } + list { + + } + default { + #todo - disallow unknown types unless prefixed with custom- + } + } + tcl::dict::set valspec_defaults $k $v + } + -optional - + -allow_ansi - + -validate_without_ansi - + -strip_ansi - + -multiple { + tcl::dict::set valspec_defaults $k $v + } + default { + set known { -min -minvalues -max -maxvalues\ + -minlen -maxlen -range -choices -choicelabels\ + -nominlen -nomaxlen -norange -nochoices -nochoicelabels\ + -type -optional -allow_ansi -validate_without_ansi -strip_ansi -multiple\ + } + error "punk::args::Get_argspecs - unrecognised key '$k' in *values line. Known keys: $known" + } + } + } + + } + default { + error "punk::args::Get_argspecs - unrecognised * line in '$ln'. Expected *proc *opts or *values - use **name if paramname needs to be *name" + } + } + continue + } elseif {$firstchar eq "-"} { + set argspecs $linespecs + tcl::dict::set argspecs -ARGTYPE option + lappend opt_names $argname + set is_opt 1 + } else { + if {$firstchar eq "*"} { + #allow basic ** escaping for literal argname that begins with * + set argname [tcl::string::range $argname 1 end] + } + set argspecs $linespecs + tcl::dict::set argspecs -ARGTYPE value + lappend val_names $argname + set is_opt 0 + } + #assert - we only get here if it is a value or flag specification line. + #assert argspecs has been set to the value of linespecs + if {$is_opt} { + set spec_merged $optspec_defaults + } else { + set spec_merged $valspec_defaults + } + foreach {spec specval} $argspecs { + #literal-key switch - bytecompiled to jumpTable + switch -- $spec { + -type { + #normalize here so we don't have to test during actual args parsing in main function + switch -- [tcl::string::tolower $specval] { + int - integer { + tcl::dict::set spec_merged -type int + } + bool - boolean { + tcl::dict::set spec_merged -type bool + } + char - character { + tcl::dict::set spec_merged -type char + } + dict - dictionary { + tcl::dict::set spec_merged -type dict + } + "" - none { + if {$is_opt} { + tcl::dict::set spec_merged -type none + if {[tcl::dict::exists $specval -optional] && [tcl::dict::get $specval -optional]} { + tcl::dict::set spec_merged -default 0 ;#-default 0 can still be overridden if -default appears after -type - we'll allow it. + } + lappend opt_solos $argname + } else { + #-solo only valid for flags + error "punk::args::get_dict - invalid -type 'none' for positional argument positional argument '$argname'" + } + } + any - ansistring { + tcl::dict::set spec_merged -type any + } + default { + #allow custom unknown types through for now. Todo - require unknown types to begin with custom- REVIEW + tcl::dict::set spec_merged -type [tcl::string::tolower $specval] + } + } + } + -default - -solo - -range - -choices - -choicelabels - -minlen - -maxlen - -nocase - -optional - -multiple - -validate_without_ansi - -allow_ansi - -strip_ansi - -help - -ARGTYPE { + #review -solo 1 vs -type none ? + tcl::dict::set spec_merged $spec $specval + } + default { + set known_argspecs [list -default -type -range -choices -nocase -optional -multiple -validate_without_ansi -allow_ansi -strip_ansi -help] + error "punk::args::get_dict - unrecognised key '$spec' in specifications for argument '$argname' Known option specification keys: $known_argspecs" + } + } + } + set argspecs $spec_merged + if {$is_opt} { + set argchecks [tcl::dict::remove $argspecs -type -default -multiple -strip_ansi -validate_without_ansi -allow_ansi] ;#leave things like -range -minlen + } else { + set argchecks [tcl::dict::remove $argspecs -type -default -multiple -strip_ansi -validate_without_ansi -allow_ansi] ;#leave things like -range -minlen + } + tcl::dict::set arg_info $argname $argspecs + tcl::dict::set arg_checks $argname $argchecks + #review existence of -default overriding -optional + if {![tcl::dict::get $argspecs -optional] && ![tcl::dict::exists $argspecs -default]} { + if {$is_opt} { + lappend opt_required $argname + } else { + lappend val_required $argname + } + } + if {[tcl::dict::exists $argspecs -default]} { + if {$is_opt} { + tcl::dict::set opt_defaults $argname [tcl::dict::get $argspecs -default] + } else { + tcl::dict::set val_defaults $argname [tcl::dict::get $argspecs -default] + } + } + } + + #confirm any valnames before last don't have -multiple key + foreach valname [lrange $val_names 0 end-1] { + if {[tcl::dict::get $arg_info $valname -multiple]} { + error "bad key -multiple on argument spec for '$valname'. Only the last value argument specification can be marked -multiple" + } + } + if {$spec_id eq "" || [tcl::string::tolower $spec_id] eq "auto"} { + variable id_counter + set spec_id "autoid_[incr id_counter]" + } + + + set opt_checks_defaults [tcl::dict::remove $optspec_defaults -type -default -multiple -strip_ansi -validate_without_ansi -allow_ansi] ;#leave things like -range -minlen + set val_checks_defaults [tcl::dict::remove $valspec_defaults -type -default -multiple -strip_ansi -validate_without_ansi -allow_ansi] ;#leave things like -range -minlen + + set result [tcl::dict::create\ + id $spec_id\ + arg_info $arg_info\ + arg_checks $arg_checks\ + opt_defaults $opt_defaults\ + opt_required $opt_required\ + opt_names $opt_names\ + opt_any $opt_any\ + opt_solos $opt_solos\ + optspec_defaults $optspec_defaults\ + opt_checks_defaults $opt_checks_defaults\ + val_defaults $val_defaults\ + val_required $val_required\ + val_names $val_names\ + val_min $val_min\ + val_max $val_max\ + valspec_defaults $valspec_defaults\ + val_checks_defaults $val_checks_defaults\ + proc_info $proc_info\ + ] + tcl::dict::set argspec_cache $cache_key $result + tcl::dict::set argspecs $spec_id $optionspecs + #puts "xxx:$result" + return $result + } + + proc get_spec {id} { + variable argspecs + if {[tcl::dict::exists $argspecs $id]} { + return [tcl::dict::get $argspecs $id] + } + return + } + proc get_spec_ids {{match *}} { + variable argspecs + return [tcl::dict::keys $argspecs $match] + } + + #for use within get_dict only + #This mechanism gets less-than-useful results for oo methods + #e.g {$obj} + proc Get_caller {} { + set cmdinfo [tcl::dict::get [tcl::info::frame -3] cmd] + #puts "-->$cmdinfo" + #puts "-->[tcl::info::frame -3]" + set caller [regexp -inline {\S+} $cmdinfo] + if {$caller eq "namespace"} { + set cmdinfo "punk::args::get_dict called from namespace" + } + return $cmdinfo + } + + proc arg_error {msg spec_dict {badarg ""}} { + # use basic colours here to support terminals without extended colours + #todo - add checks column (e.g -minlen -maxlen) + set errmsg $msg + if {![catch {package require textblock}]} { + if {[catch { + append errmsg \n + set procname [punk::lib::dict_getdef $spec_dict proc_info -name ""] + set prochelp [punk::lib::dict_getdef $spec_dict proc_info -help ""] + + #set t [textblock::class::table new [a+ web-yellow]Usage[a]] + set t [textblock::class::table new [a+ brightyellow]Usage[a]] + + set blank_header_col [list ""] + if {$procname ne ""} { + lappend blank_header_col "" + #set procname_display [a+ web-white]$procname[a] + set procname_display [a+ brightwhite]$procname[a] + } else { + set procname_display "" + } + if {$prochelp ne ""} { + lappend blank_header_col "" + #set prochelp_display [a+ web-white]$prochelp[a] + set prochelp_display [a+ brightwhite]$prochelp[a] + } else { + set prochelp_display "" + } + $t add_column -headers $blank_header_col -minwidth 3 + $t add_column -headers $blank_header_col + $t add_column -headers $blank_header_col + $t add_column -headers $blank_header_col + $t add_column -headers $blank_header_col + if {"$procname$prochelp" eq ""} { + $t configure_header 0 -values {Arg Type Default Multiple Help} + } elseif {$procname eq ""} { + $t configure_header 0 -colspans {1 4 0 0 0} -values [list Description: $prochelp_display] + $t configure_header 1 -values {Arg Type Default Multiple Help} + } elseif {$prochelp eq ""} { + $t configure_header 0 -colspans {1 4 0 0 0} -values [list PROC/METHOD: $procname_display] + $t configure_header 1 -values {Arg Type Default Multiple Help} + } else { + $t configure_header 0 -colspans {1 4 0 0 0} -values [list PROC/METHOD: $procname_display] + $t configure_header 1 -colspans {1 4 0 0 0} -values [list Description: $prochelp_display] + $t configure_header 2 -values {Arg Type Default Multiple Help} + } + + #set c_default [a+ web-white Web-limegreen] + set c_default [a+ brightwhite Brightgreen] + #set c_badarg [a+ web-crimson] + set c_badarg [a+ brightred] + #set greencheck [a+ web-limegreen]\u2713[a] + set greencheck [a+ brightgreen]\u2713[a] + + foreach arg [dict get $spec_dict opt_names] { + set arginfo [dict get $spec_dict arg_info $arg] + if {[dict exists $arginfo -default]} { + #set default $c_default[dict get $arginfo -default] + set default [dict get $arginfo -default] + } else { + set default "" + } + set help [punk::lib::dict_getdef $arginfo -help ""] + if {[dict exists $arginfo -choices]} { + if {$help ne ""} {append help \n} + append help "Choices: [dict get $arginfo -choices]" + } + if {[punk::lib::dict_getdef $arginfo -multiple 0]} { + set multiple $greencheck + } else { + set multiple "" + } + $t add_row [list $arg [dict get $arginfo -type] $default $multiple $help] + if {$arg eq $badarg} { + $t configure_row [expr {[$t row_count]-1}] -ansibase $c_badarg + } + } + foreach arg [dict get $spec_dict val_names] { + set arginfo [dict get $spec_dict arg_info $arg] + if {[dict exists $arginfo -default]} { + set default [dict get $arginfo -default] + } else { + set default "" + } + set help [punk::lib::dict_getdef $arginfo -help ""] + if {[dict exists $arginfo -choices]} { + if {$help ne ""} {append help \n} + append help "Choices: [dict get $arginfo -choices]" + } + if {[punk::lib::dict_getdef $arginfo -multiple 0]} { + set multiple $greencheck + } else { + set multiple "" + } + $t add_row [list $arg [dict get $arginfo -type] $default $multiple $help] + if {$arg eq $badarg} { + $t configure_row [expr {[$t row_count]-1}] -ansibase $c_badarg + } + } + + + #$t configure -show_hseps 0 -show_header 1 -ansibase_body [a+ web-white] -ansibase_header [a+ brightyellow] -ansiborder_header [a+ brightyellow] -ansiborder_body [a+ brightyellow] + $t configure -show_hseps 0 -show_header 1 -ansibase_body [a+ brightwhite] -ansibase_header [a+ brightyellow] -ansiborder_header [a+ brightyellow] -ansiborder_body [a+ brightyellow] + $t configure -maxwidth 80 + append errmsg [$t print] + $t destroy + } errM]} { + catch {$t destroy} + append errmsg \n + append errmsg "(additional error in punk::args::arg_error when attempting to display usage)" \n + append errmsg "$errM" \n + append errmsg "$::errorInfo" + + } + } else { + #todo - something boring + } + #add PUNK to the tail end of the more usual -errorcode {TCL WRONGARGS} so we maintain reasonable compat with things looking for TCL WRONGARGS - but also differentiate it. + #Also,we're polite enough in the errorInfo, nothing wrong with a Clint Eastwood style errorCode ;) + return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg + } + + #todo - a version of get_dict that supports punk::lib::tstr templating + #rename get_dict + #provide ability to look up and reuse definitions from ids etc + # + + #generally we expect values to contain leading dashes only if -- specified. Otherwise no reliable way determine difference between bad flags and values + #If no eopts (--) specified we stop looking for opts at the first nondash encountered in a position we'd expect a dash - so without eopt, values could contain dashes - but not in first position after flags. + #only supports -flag val pairs, not solo options + #If an option is supplied multiple times - only the last value is used. + proc get_dict {optionspecs args} { + #*** !doctools + #[call [fun get_dict] [arg optionspecs] [arg rawargs] [opt {option value...}]] + #[para]Parse rawargs as a sequence of zero or more option-value pairs followed by zero or more values + #[para]Returns a dict of the form: opts values + #[para]ARGUMENTS: + #[list_begin arguments] + #[arg_def multiline-string optionspecs] + #[para] This a block of text with records delimited by newlines (lf or crlf) - but with multiline values allowed if properly quoted/braced + #[para]'info complete' is used to determine if a record spans multiple lines due to multiline values + #[para]Each optionspec line defining a flag must be of the form: + #[para]-optionname -key val -key2 val2... + #[para]where the valid keys for each option specification are: -default -type -range -choices -optional + #[para]Each optionspec line defining a positional argument is of the form: + #[para]argumentname -key val -ky2 val2... + #[para]where the valid keys for each option specification are: -default -type -range -choices + #[para]comment lines begining with # are ignored and can be placed anywhere except within a multiline value where it would become part of that value + #[para]lines beginning with *proc *opts or *values also take -key val pairs and can be used to set defaults and control settings. + #[para]*opts or *values lines can appear multiple times with defaults affecting flags/values that follow. + #[arg_def list rawargs] + #[para] This is a list of the arguments to parse. Usually it will be the $args value from the containing proc, + #but it could be a manually constructed list of values made for example from positional args defined in the proc. + #[list_end] + #[para] + + #consider line-processing example below for which we need info complete to determine record boundaries + #punk::args::get_dict { + # *opts + # -opt1 -default {} + # -opt2 -default { + # etc + # } + # *values -multiple 1 + #} $args + + if {[llength $args] == 0} { + set rawargs [list] + } elseif {[llength $args] ==1} { + set rawargs [lindex $args 0] ;#default tcl style + } else { + #todo - can we support tk style vals before flags? + #the aim is to allow a single call to validate standard leading proc positional args along with the $args val which may have flags in tk or tcl order + #by allowing all to be in a single call - we could use a -help (or similar erroring call) to trigger the spec-parsing from a wrapper function. + #this would be important in the case where the function to be wrapped has never been called - but the wrapper needs info about the downstream options + #we would like to avoid the ugliness of trying to parse a proc body to scrape the specification. + #we may still need to do a basic scan of the proc body to determine if it at least contains the string punk::args::get_dict - but that is slightly less odious. + error "unsupported number of arguments for punk::args::get_dict" + set inopt 0 + set k "" + set i 0 + foreach a $args { + switch -- $f { + -opts { + + } + -vals { + + } + -optvals { + #tk style + + } + -valopts { + #tcl style + set rawargs [lindex $args $i+1] + incr i + } + default { + + } + } + incr i + } + } + + + set argspecs [Get_argspecs $optionspecs] + tcl::dict::with argspecs {} ;#turn keys into vars + #puts "-arg_info->$arg_info" + set flagsreceived [list] ;#for checking if required flags satisfied + #secondary purpose: + #for -multple true, we need to ensure we can differentiate between a default value and a first of many that happens to match the default. + #-default value must not be appended to if argname not yet in flagsreceived + + + #todo: -minmultiple -maxmultiple ? + + set opts $opt_defaults + if {[set eopts [lsearch -exact $rawargs "--"]] >= 0} { + lappend flagsreceived -- + set values [lrange $rawargs $eopts+1 end] + set arglist [lrange $rawargs 0 $eopts-1] + set maxidx [expr {[llength $arglist]-1}] + for {set i 0} {$i <= $maxidx} {incr i} { + set a [lindex $arglist $i] + if {![tcl::string::match -* $a]} { + #we can't treat as first positional arg - as it comes before the eopt indicator -- + arg_error "punk::args::get_dict bad options for [Get_caller]. Expected flag (leading -) at position $i got:$rawargs" $argspecs + } + + if {![catch {tcl::prefix match -message "options for %caller%. Unexpected option" $opt_names $a } fullopt]} { + if {[tcl::dict::get $arg_info $fullopt -type] ne "none"} { + #non-solo + set flagval [lindex $arglist $i+1] + if {[dict get $arg_info $fullopt -multiple]} { + #don't lappend to default - we need to replace if there is a default + #review - what if user sets first value that happens to match a default? + if {$fullopt ni $flagsreceived && [tcl::dict::exists $opt_defaults $fullopt] && ([tcl::dict::get $opt_defaults $fullopt] eq [tcl::dict::get $opts $fullopt])} { + #first occurrence of this flag, whilst stored value matches default + tcl::dict::set opts $fullopt [list $flagval] + } else { + tcl::dict::lappend opts $fullopt $flagval + } + } else { + tcl::dict::set opts $fullopt $flagval + } + #incr i to skip flagval + if {[incr i] > $maxidx} { + arg_error "punk::args::get_dict bad options for [Get_caller]. No value supplied for last option $fullopt at index [expr {$i-1}] which is not marked with -type none" $argspecs $fullopt + } + } else { + #type none (solo-flag) + if {[tcl::dict::get $arg_info $fullopt -multiple]} { + if {[tcl::dict::get $opts $fullopt] == 0} { + #review - what if default at time opt was specified is not zero? we will end up with more items in the list than the number of times the flag was specified + tcl::dict::set opts $fullopt 1 + } else { + tcl::dict::lappend opts $fullopt 1 + } + } else { + tcl::dict::set opts $fullopt 1 + } + } + lappend flagsreceived $fullopt ;#dups ok + } else { + if {$opt_any} { + set newval [lindex $arglist $i+1] + #opt was unspecified but is allowed due to *opt -any 1 - 'adhoc/passthrough' option + tcl::dict::set arg_info $a $optspec_defaults ;#use default settings for unspecified opt + tcl::dict::set arg_checks $a $opt_checks_defaults + if {[tcl::dict::get $arg_info $a -type] ne "none"} { + if {[tcl::dict::get $arg_info $a -multiple]} { + tcl::dict::lappend opts $a $newval + } else { + tcl::dict::set opts $a $newval + } + lappend flagsreceived $a ;#adhoc flag as supplied + if {[incr i] > $maxidx} { + arg_error "punk::args::get_dict bad options for [Get_caller]. No value supplied for last adhoc option $a at index [expr {$i-1}] which is not marked with -type none" $argspecs $a + } + } else { + #review -we can't provide a way to allow unspecified -type none flags through reliably/unambiguously unless all remaining unspecified options are -type none + if {[tcl::dict::get $arg_info $a -multiple]} { + if {![tcl::dict::exists $opts $a]} { + tcl::dict::set opts $a 1 + } else { + tcl::dict::lappend opts $a 1 + } + } else { + tcl::dict::set opts $a 1 + } + } + } else { + #delay Get_caller so only called in the unhappy path + set errmsg [tcl::string::map [list %caller% [Get_caller]] $fullopt] + arg_error $errmsg $argspecs $fullopt + } + } + + } + } else { + if {[lsearch $rawargs -*] >= 0} { + #no -- end of opts indicator + #to support option values with leading dash e.g -offset -1 , we can't just use the last flagindex to determine start of positional args. + #we break on first non-flag looking argument that isn't in an option's value position and use that index as the division. + #The caller should use -- if the first positional arg is likely or has the potential to start with a dash. + + set maxidx [expr {[llength $rawargs]-1}] + for {set i 0} {$i <= $maxidx} {incr i} { + set a [lindex $rawargs $i] + #we can automatically rule out arguments containing whitespace from the set of simple flags beginning with a dash + #This helps for example when first value is a dict or list in which the first item happens to begin with a dash + #explicit -- still safer in many cases, but this is a reasonable and fast enough test + if {![tcl::string::match -* $a] || [regexp {\s+} $a]} { + #assume beginning of positional args + incr i -1 + break + } + + if {![catch {tcl::prefix match -message "options for %caller%. Unexpected option" $opt_names $a } fullopt]} { + if {[tcl::dict::get $arg_info $fullopt -type] ne "none"} { + #non-solo + set flagval [lindex $rawargs $i+1] + if {[dict get $arg_info $fullopt -multiple]} { + #don't lappend to default - we need to replace if there is a default + #review - what if user sets first value that happens to match a default? + if {$fullopt ni $flagsreceived && [tcl::dict::exists $opt_defaults $fullopt] && ([tcl::dict::get $opt_defaults $fullopt] eq [tcl::dict::get $opts $fullopt])} { + #first occurrence of this flag, whilst stored value matches default + tcl::dict::set opts $fullopt [list $flagval] + } else { + tcl::dict::lappend opts $fullopt $flagval + } + } else { + tcl::dict::set opts $fullopt $flagval + } + #incr i to skip flagval + if {[incr i] > $maxidx} { + arg_error "punk::args::get_dict bad options for [Get_caller]. No value supplied for last option $fullopt at index [expr {$i-1}] which is not marked with -type none" $argspecs $fullopt + } + } else { + #type none (solo-flag) + if {[tcl::dict::get $arg_info $fullopt -multiple]} { + if {[tcl::dict::get $opts $fullopt] == 0} { + #review - what if default at time opt was specified is not zero? we will end up with more items in the list than the number of times the flag was specified + tcl::dict::set opts $fullopt 1 + } else { + tcl::dict::lappend opts $fullopt 1 + } + } else { + tcl::dict::set opts $fullopt 1 + } + } + lappend flagsreceived $fullopt ;#dups ok + } else { + if {$opt_any} { + set newval [lindex $rawargs $i+1] + #opt was unspecified but is allowed due to *opt -any 1 - 'adhoc/passthrough' option + tcl::dict::set arg_info $a $optspec_defaults ;#use default settings for unspecified opt + tcl::dict::set arg_checks $a $opt_checks_defaults + if {[tcl::dict::get $arg_info $a -type] ne "none"} { + if {[tcl::dict::get $arg_info $a -multiple]} { + tcl::dict::lappend opts $a $newval + } else { + tcl::dict::set opts $a $newval + } + lappend flagsreceived $a ;#adhoc flag as supplied + if {[incr i] > $maxidx} { + arg_error "punk::args::get_dict bad options for [Get_caller]. No value supplied for last adhoc option $a at index [expr {$i-1}] which is not marked with -type none" $argspecs $a + } + } else { + #review -we can't provide a way to allow unspecified -type none flags through reliably/unambiguously unless all remaining unspecified options are -type none + if {[tcl::dict::get $arg_info $a -multiple]} { + if {![tcl::dict::exists $opts $a]} { + tcl::dict::set opts $a 1 + } else { + tcl::dict::lappend opts $a 1 + } + } else { + tcl::dict::set opts $a 1 + } + } + } else { + #delay Get_caller so only called in the unhappy path + set errmsg [tcl::string::map [list %caller% [Get_caller]] $fullopt] + arg_error $errmsg $argspecs $fullopt + } + } + } + set arglist [lrange $rawargs 0 $i] + set values [lrange $rawargs $i+1 end] + #puts "$i--->arglist:$arglist" + #puts "$i--->values:$values" + } else { + set values $rawargs ;#no -flags detected + set arglist [list] + } + } + set validx 0 + set in_multiple "" + set valnames_received [list] + set values_dict $val_defaults + set num_values [llength $values] + foreach valname $val_names val $values { + if {$validx+1 > $num_values} { + break + } + if {$valname ne ""} { + if {[tcl::dict::get $arg_info $valname -multiple]} { + if {[tcl::dict::exists $val_defaults $valname] && ([tcl::dict::get $val_defaults $valname] eq [tcl::dict::get $values_dict $valname])} { + #current stored val equals defined default - don't include default in the list we build up + tcl::dict::set values_dict $valname [list $val] ;#important to treat first element as a list + } else { + tcl::dict::lappend values_dict $valname $val + } + set in_multiple $valname + } else { + tcl::dict::set values_dict $valname $val + } + lappend valnames_received $valname + } else { + if {$in_multiple ne ""} { + tcl::dict::lappend values_dict $in_multiple $val + #name already seen + } else { + tcl::dict::set values_dict $validx $val + tcl::dict::set arg_info $validx $valspec_defaults + tcl::dict::set arg_checks $validx $val_checks_defaults + lappend valnames_received $validx + } + } + incr validx + } + + if {$val_max == -1} { + #only check min + if {$num_values < $val_min} { + arg_error "bad number of trailing values for [Get_caller]. Got $num_values values. Expected at least $val_min" $argspecs + } + } else { + if {$num_values < $val_min || $num_values > $val_max} { + if {$val_min == $val_max} { + arg_error "bad number of trailing values for [Get_caller]. Got $num_values values. Expected exactly $val_min" $argspecs + } else { + arg_error "bad number of trailing values for [Get_caller]. Got $num_values values. Expected between $val_min and $val_max inclusive" $argspecs + } + } + } + + #assertion - opts keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options + + + #opts explicitly marked as -optional 0 must be present - regardless of -anyopts (which allows us to ignore additional opts to pass on to next call) + #however - if -anyopts is true, there is a risk we will treat a shortened option name as matching our default - when it was intended for the next call + #We will always require exact matches for all required opts to avoid this risk, even though an ultimately-called function may not require the full-length option-name REVIEW + #The aim is to allow a wrapper function to specify a default value for an option (and/or other changes/restrictions to a particular option or two) - without having to re-specify all the options for the underlying function. + #without full respecification in the wrapper - we can't know that a supplied prefix is unambiguous at the next level + #For this reason we need to pass on full-length opts for any defined options in the wrapper even if anyopts is true + + #safe interp note - cannot avoid struct::set difference ensemble as it could be c or tcl implementation and we don't have an option to call directly? + #example timing difference: + #struct::set difference {x} {a b} + #normal interp 0.18 u2 vs safe interp 9.4us + #if {[llength [set missing [struct::set difference $opt_required $flagsreceived]]]} { + # error "Required option missing for [Get_caller]. missing flags $missing are marked with -optional false - so must be present in full-length form" + #} + #if {[llength [set missing [struct::set difference $val_required $valnames_received]]]} { + # error "Required value missing for [Get_caller]. missing values $missing marked with -optional false - so must be present" + #} + #for now (2024-06) punk::lib::ldiff is a better compromise across normal/safe interps e.g 0.7/0.8us + if {[llength [set missing [punk::lib::ldiff $opt_required $flagsreceived]]]} { + arg_error "Required option missing for [Get_caller]. missing flags: '$missing' are marked with -optional false - so must be present in full-length form" $argspecs + } + if {[llength [set missing [punk::lib::ldiff $val_required $valnames_received]]]} { + arg_error "Required value missing for [Get_caller]. missing values: '$missing' marked with -optional false - so must be present" $argspecs + } + + + #todo - truncate/summarize values in error messages + + #todo - allow defaults outside of choices/ranges + + #check types,ranges,choices + set opts_and_values [tcl::dict::merge $opts $values_dict] + #set combined_defaults [tcl::dict::merge $val_defaults $opt_defaults] ;#can be no key collisions - we don't allow a value key beginning with dash - opt names must begin with dash + #puts "---opts_and_values:$opts_and_values" + #puts "---arg_info:$arg_info" + tcl::dict::for {argname v} $opts_and_values { + set thisarg [tcl::dict::get $arg_info $argname] + #set thisarg_keys [tcl::dict::keys $thisarg] + set thisarg_checks [tcl::dict::get $arg_checks $argname] + set is_multiple [tcl::dict::get $thisarg -multiple] + set is_allow_ansi [tcl::dict::get $thisarg -allow_ansi] + set is_validate_without_ansi [tcl::dict::get $thisarg -validate_without_ansi] + set is_strip_ansi [tcl::dict::get $thisarg -strip_ansi] + set has_default [tcl::dict::exists $thisarg -default] + if {$has_default} { + set defaultval [tcl::dict::get $thisarg -default] + } + set type [tcl::dict::get $thisarg -type] + set has_choices [tcl::dict::exists $thisarg -choices] + + if {$is_multiple} { + set vlist $v + } else { + set vlist [list $v] + } + if {!$is_allow_ansi} { + #allow_ansi 0 + package require punk::ansi + #do not run ta::detect on a list + foreach e $vlist { + if {[punk::ansi::ta::detect $e]} { + error "Option $argname for [Get_caller] contains ansi - but -allow_ansi is false. Received: '$e'" + } + } + } + if {$is_validate_without_ansi} { + #validate_without_ansi 1 + package require punk::ansi + set vlist_check [list] + foreach e $vlist { + lappend vlist_check [punk::ansi::ansistrip $e] + } + } else { + #validate_without_ansi 0 + set vlist_check $vlist + } + + set is_default 0 + if {$has_default} { + foreach e_check $vlist_check { + if {$e_check eq $defaultval} { + incr is_default + } + } + if {$is_default eq [llength $vlist]} { + set is_default 1 + } else { + #important to set 0 here too e.g if only one element of many matches default + set is_default 0 + } + } + #puts "argname:$argname v:$v is_default:$is_default" + #we want defaults to pass through - even if they don't pass the checks that would be required for a specified value + #If the caller manually specified a value that happens to match the default - we don't detect that as any different from an unspecified value - Review. + #arguments that are at their default are not subject to type and other checks + if {$is_default == 0} { + switch -- $type { + any {} + list { + foreach e_check $vlist_check { + if {![tcl::string::is list -strict $e_check]} { + arg_error "Option $argname for [Get_caller] requires type 'list'. Received: '$e_check'" $argspecs $argname + } + if {[tcl::dict::size $thisarg_checks]} { + tcl::dict::for {checkopt checkval} $thisarg_checks { + switch -- $checkopt { + -minlen { + # -1 for disable is as good as zero + if {[llength $e_check] < $checkval} { + arg_error "Option $argname for [Get_caller] requires list with -minlen $checkval. Received len:[llength $e_check] value:'$e_check'" $argspecs $argname + } + } + -maxlen { + if {$checkval ne "-1"} { + if {[llength $e_check] > $checkval} { + arg_error "Option $argname for [Get_caller] requires list with -maxlen $checkval. Received len:[llength $e_check] value:'$e_check'" $argspecs $argname + } + } + } + } + } + } + } + } + string { + if {[tcl::dict::size $thisarg_checks]} { + foreach e_check $vlist_check { + #safe jumptable test + #dict for {checkopt checkval} $thisarg_checks {} + tcl::dict::for {checkopt checkval} $thisarg_checks { + switch -- $checkopt { + -minlen { + # -1 for disable is as good as zero + if {[tcl::string::length $e_check] < $checkval} { + arg_error "Option $argname for [Get_caller] requires string with -minlen $checkval. Received len:[tcl::string::length $e_check] value:'$e_check'" $argspecs $argname + } + } + -maxlen { + if {$checkval ne "-1"} { + if {[tcl::string::length $e_check] > $checkval} { + arg_error "Option $argname for [Get_caller] requires string with -maxlen $checkval. Received len:[tcl::string::length $e_check] value:'$e_check'" $argspecs $argname + } + } + } + } + } + } + } + } + ansistring { + package require ansi + } + int { + if {[tcl::dict::exists $thisarg -range]} { + lassign [tcl::dict::get $thisarg -range] low high + foreach e $vlist e_check $vlist_check { + if {![tcl::string::is integer -strict $e_check]} { + arg_error "Option $argname for [Get_caller] requires type 'integer'. Received: '$e'" $argspecs $argname + } + if {$e_check < $low || $e_check > $high} { + arg_error "Option $argname for [Get_caller] must be integer between $low and $high. Received: '$e'" $argspecs $argname + } + } + } else { + foreach e_check $vlist_check { + if {![tcl::string::is integer -strict $e_check]} { + arg_error "Option $argname for [Get_caller] requires type 'integer'. Received: '$e_check'" $argspecs $argname + } + } + } + } + double { + foreach e $vlist e_check $vlist_check { + if {![tcl::string::is double -strict $e_check]} { + error "Option $argname for [Get_caller] requires type 'double'. Received: '$e'" + } + if {[tcl::dict::size $thisarg_checks]} { + #safe jumptable test + #dict for {checkopt checkval} $thisarg_checks {} + tcl::dict::for {checkopt checkval} $thisarg_checks { + switch -- $checkopt { + -range { + #todo - small-value double comparisons with error-margin? review + lassign $checkval low high + if {$e_check < $low || $e_check > $high} { + arg_error "Option $argname for [Get_caller] must be between $low and $high. Received: '$e'" $argspecs $argname + } + } + } + } + } + } + } + bool { + foreach e_check $vlist_check { + if {![tcl::string::is boolean -strict $e_check]} { + arg_error "Option $argname for [Get_caller] requires type 'boolean'. Received: '$e_check'" $argspecs $argname + } + } + } + dict { + foreach e_check $vlist_check { + if {[llength $e_check] %2 != 0} { + arg_error "Option $argname for [Get_caller] requires type 'dict' - must be key value pairs. Received: '$e_check'" $argspecs $argname + } + if {[tcl::dict::size $thisarg_checks]} { + tcl::dict::for {checkopt checkval} $thisarg_checks { + switch -- $checkopt { + -minlen { + # -1 for disable is as good as zero + if {[tcl::dict::size $e_check] < $checkval} { + arg_error "Option $argname for [Get_caller] requires dict with -minlen $checkval. Received dict size:[dict size $e_check] value:'$e_check'" $argspecs $argname + } + } + -maxlen { + if {$checkval ne "-1"} { + if {[tcl::dict::size $e_check] > $checkval} { + arg_error "Option $argname for [Get_caller] requires dict with -maxlen $checkval. Received dict size:[dict size $e_check] value:'$e_check'" $argspecs $argname + } + } + } + } + } + } + } + } + alnum - + alpha - + ascii - + control - + digit - + graph - + lower - + print - + punct - + space - + upper - + wordchar - + xdigit { + foreach e $vlist e_check $vlist_check { + if {![tcl::string::is $type $e_check]} { + arg_error "Option $argname for [Get_caller] requires type '$type'. Received: '$e'" $argspecs $argname + } + } + } + file - + directory - + existingfile - + existingdirectory { + foreach e $vlist e_check $vlist_check { + if {!([tcl::string::length $e_check]>0 && ![regexp {[\"*?<>\;]} $e_check])} { + #what about special file names e.g on windows NUL ? + arg_error "Option $argname for [Get_caller] requires type '$type'. Received: '$e' which doesn't look like it could be a file or directory" $argspecs $argname + } + } + if {$type eq "existingfile"} { + foreach e $vlist e_check $vlist_check { + if {![file exists $e_check]} { + arg_error "Option $argname for [Get_caller] requires type '$type'. Received: '$e' which is not an existing file" $argspecs $argname + } + } + } elseif {$type eq "existingdirectory"} { + foreach e $vlist e_check $vlist_check { + if {![file isdirectory $e_check]} { + arg_error "Option $argname for [Get_caller] requires type '$type'. Received: '$e' which is not an existing directory" $argspecs $argname + } + } + } + } + char { + foreach e $vlist e_check $vlist_check { + if {[tcl::string::length $e_check] != 1} { + arg_error "Option $argname for [Get_caller] requires type 'character'. Received: '$e' which is not a single character" $argspecs $argname + } + } + } + } + if {$has_choices} { + #todo -choicelabels + set choices [tcl::dict::get $thisarg -choices] + set nocase [tcl::dict::get $thisarg -nocase] + foreach e $vlist e_check $vlist_check { + if {$nocase} { + set casemsg "(case insensitive)" + set choices_test [tcl::string::tolower $choices] + set v_test [tcl::string::tolower $e_check] + } else { + set casemsg "(case sensitive)" + set v_test $e_check + set choices_test $choices + } + if {$v_test ni $choices_test} { + arg_error "Option $argname for [Get_caller] must be one of the listed values $choices $casemsg. Received: '$e'" $argspecs $argname + } + } + } + } + if {$is_strip_ansi} { + set stripped_list [lmap e $vlist {punk::ansi::ansistrip $e}] ;#no faster or slower, but more concise than foreach + if {[tcl::dict::get $thisarg -multiple]} { + if {[tcl::dict::get $thisarg -ARGTYPE] eq "option"} { + tcl::dict::set opts $argname $stripped_list + } else { + tcl::dict::set values_dict $argname $stripped_list + } + } else { + if {[tcl::dict::get $thisarg -ARGTYPE] eq "option"} { + tcl::dict::set opts $argname [lindex $stripped_list 0] + } else { + tcl::dict::set values_dict [lindex $stripped_list 0] + } + } + } + } + + #maintain order of opts $opts values $values as caller may use lassign. + return [tcl::dict::create opts $opts values $values_dict] + } + + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::args ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::args::lib { + tcl::namespace::export * + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::args::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {option value...}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::args::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::args::system { + #*** !doctools + #[subsection {Namespace punk::args::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::args [tcl::namespace::eval punk::args { + variable pkg punk::args + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/assertion-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/assertion-0.1.0.tm new file mode 100644 index 00000000..bee5a415 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/assertion-0.1.0.tm @@ -0,0 +1,424 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::assertion 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::assertion 0 0.1.0] +#[copyright "2024"] +#[titledesc {assertion alternative to control::assert}] [comment {-- Name section and table of contents description --}] +#[moddesc {per-namespace assertions with }] [comment {-- Description at end of page heading --}] +#[require punk::assertion] +#[keywords module assertion assert debug] +#[description] +#[para] The punk::assertion library has the same semantics as Tcllib's control::assert library for the assert command itself. +#[para] The main difference is the way in which assert is enabled/disabled in namespaces. +#[para] Due to commands such as 'namespace path' - the assert command could be available in arbitrary namespaces unrelated by tree structure to namespaces where assert has been directly imported. +#[para] punk::assertion::active 0|1 allows activating and deactivating assertions in any namespace where the assert command is available - but only affecting the activations state of the namespace in which it is called. +#[para] If such a non-primary assertion namespace never had active set to 0 or 1 - then it will activate/deactivate when the namespace corresponding to the found assert command (primary) is activated/deactivated. +#[para] Once marked active or inactive - such a non-primary namespace will no longer follow the primary + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::assertion +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::assertion +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::assertion::class { + #*** !doctools + #[subsection {Namespace punk::assertion::class}] + #[para] class definitions + if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#keep 2 namespaces for assertActive and assertInactive so there is introspection available via namespace origin +tcl::namespace::eval punk::assertion::primary { + #tcl::namespace::export {[a-z]*} + tcl::namespace::export assertActive assertInactive + proc assertActive {expr args} { + + set code [catch {uplevel 1 [list expr $expr]} res] + if {$code} { + return -code $code $res + } + if {![tcl::string::is boolean -strict $res]} { + return -code error "invalid boolean expression: $expr" + } + + if {$res} {return} + + if {[llength $args]} { + #set msg "[join $args]" + set msg "$args punk::assertion failed expr $expr" + } else { + set msg "punk::assertion failed expr $expr" ;#give a clue in the default msg about which assert lib is in use + } + + upvar ::punk::assertion::CallbackCmd CallbackCmd + # Might want to catch this + tcl::namespace::eval :: $CallbackCmd [list $msg] + } + proc assertInactive args {} + +} +tcl::namespace::eval punk::assertion::secondary { + tcl::namespace::export * + #we need to actually define these procs here, (not import then re-export) - or namespace origin will report the original source namespace - which isn't what we want. + proc assertActive {expr args} [tcl::info::body ::punk::assertion::primary::assertActive] + proc assertInactive args {} +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::assertion { + variable CallbackCmd [list return -code error] + + #puts --------AAA + #*very* slow in safe interp - why? + #tcl::namespace::import ::punk::assertion::primary::assertActive + + proc do_ns_import {} { + uplevel 1 [list tcl::namespace::import ::punk::assertion::primary::assertActive] + } + do_ns_import + #puts --------BBB + rename assertActive assert + +} + + +tcl::namespace::eval punk::assertion { + tcl::namespace::export * + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::assertion}] + #[para] Core API functions for punk::assertion + #[list_begin definitions] + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + #like tcllib's control::assert - we are limited to the same callback for all namespaces. + #review - a per namespace - or per assert command callback may be tricky to do performantly. + #Would probably involve rewriting the proc body - otherwise we have a runtime penalty in the assert of looking it up. + proc callback {args} { + #set nscaller [uplevel 1 [list namespace current]] + #set which_assert [namespace eval $nscaller {namespace which assert}] + + upvar ::punk::assertion::CallbackCmd cb + set n [llength $args] + if {$n > 1} { + return -code error "wrong # args: should be\ + \"[lindex [tcl::info::level 0] 0] ?command?\"" + } + if {$n} { + set cb [lindex $args 0] + return + } + return $cb + } + + proc active {{on_off ""}} { + set nscaller [uplevel 1 [list tcl::namespace::current]] + set which_assert [tcl::namespace::eval $nscaller {tcl::namespace::which assert}] + #puts "nscaller:'$nscaller'" + #puts "which_assert: $which_assert" + + if {$on_off eq ""} { + if {$which_assert eq ""} {return 0} + set assertorigin [tcl::namespace::origin $which_assert] + #puts "ns which assert: $which_assert" + #puts "ns origin assert: $assertorigin" + return [expr {"assertActive" eq [tcl::namespace::tail $assertorigin]}] + } + if {![tcl::string::is boolean -strict $on_off]} { + error "invalid boolean value : $on_off" + } else { + set info_command [tcl::namespace::eval $nscaller {tcl::info::commands assert}] + if {$on_off} { + #Enable it in calling namespace + if {"assert" eq $info_command} { + #There is an assert command reachable - due to namespace path etc, it could be in another namespace entirely - (not necessarily in an ancestor namespace of the namespace's tree structure) + if {$which_assert eq [punk::assertion::system::nsjoin ${nscaller} assert]} { + tcl::namespace::eval $nscaller { + set assertorigin [tcl::namespace::origin assert] + set assertorigin_ns [punk::assertion::system::nsprefix $assertorigin] + switch -- $assertorigin_ns { + ::punk::assertion { + #original import - switch to primary origin + rename assert {} + tcl::namespace::import ::punk::assertion::primary::assertActive + rename assertActive assert + } + ::punk::assertion::primary - ::punk::assertion::secondary { + #keep using from same origin ns + rename assert {} + tcl::namespace::import ${assertorigin_ns}::assertActive + rename assertActive assert + } + default { + error "The assert command in this namespace is not from punk::assertion package. Use the enable mechanism from the package associated with $assertorigin or remove the existing assert command and namespace import punk::assertion::assert" + } + } + } + return 1 + } else { + #assert is available, but isn't in the calling namespace - we should enable it in a way that is distinguishable from case where assert was explicitly imported to this namespace + tcl::namespace::eval $nscaller { + set assertorigin [tcl::namespace::origin assert] + if {[tcl::string::match ::punk::assertion::* $assertorigin]} { + tcl::namespace::import ::punk::assertion::secondary::assertActive + rename assertActive assert + } else { + error "The reachable assert command at '$which_assert' is not from punk::assertion package. Import punk::assertion::assert - or use the enable mechanism from the package associated with $assertorigin" + } + } + return 1 + } + + } else { + #no assert command reachable + puts stderr "no assert command visible from namespace '$nscaller' - use: namespace import ::punk::assertion::assert" + return 0 + } + } else { + #Disable + if {"assert" eq $info_command} { + if {$which_assert eq [punk::assertion::system::nsjoin ${nscaller} assert]} { + #assert is present in callers NS + tcl::namespace::eval $nscaller { + set assertorigin [tcl::namespace::origin assert] + set assertorigin_ns [punk::assertion::system::nsprefix $assertorigin] + switch -glob -- $assertorigin_ns { + ::punk::assertion { + #original import + rename assert {} + tcl::namespace::import punk::assertion::primary::assertInactive + rename assertInactive assert + } + ::punk::assertion::primary - ::punk::assertion::secondary { + #keep using from same origin ns + rename assert {} + tcl::namespace::import ${assertorigin_ns}::assertInactive + rename assertInactive assert + } + default { + error "The assert command in this namespace is not from punk::assertion package. Use the disable mechanism from the package associated with $assertorigin or remove the existing assert command and namespace import punk::assertion::assert" + } + } + } + return 0 + } else { + #assert not present in callers NS - first install of secondary (if assert is from punk::assertion::*) + tcl::namespace::eval $nscaller { + set assertorigin [tcl::namespace::origin assert] + set assertorigin_ns [punk::assertion::system::nsprefix $assertorigin] + if {[tcl::string::match ::punk::assertion::* $assertorigin]} { + tcl::namespace::import ::punk::assertion::secondary::assertInactive + rename assertInactive assert + } else { + error "The reachable assert command at '$which_assert' is not from punk::assertion package. Import punk::assertion::assert - or use the enable mechanism from the package associated with $assertorigin" + } + } + return 0 + } + } else { + #no assert command reachable + #If caller is using assert in this namespace - they should have imported it, or ensured it was reachable via namespace path + puts stderr "no assert command visible from namespace '$nscaller' - use: namespace import ::punk::assertion::assert" + return 0 + } + } + } + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::assertion ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::assertion::lib { + tcl::namespace::export * + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::assertion::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::assertion::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::assertion::system { + #*** !doctools + #[subsection {Namespace punk::assertion::system}] + #[para] Internal functions that are not part of the API + + #Maintenance - snarfed from punk::ns to reduce dependencies - punk::ns::nsprefix is the master version + #nsprefix/nstail are string functions - they do not concern themselves with what namespaces are present in the system + proc nsprefix {{nspath {}}} { + #normalize the common case of :::: + set nspath [tcl::string::map [list :::: ::] $nspath] + set rawprefix [tcl::string::range $nspath 0 end-[tcl::string::length [nstail $nspath]]] + if {$rawprefix eq "::"} { + return $rawprefix + } else { + if {[tcl::string::match *:: $rawprefix]} { + return [tcl::string::range $rawprefix 0 end-2] + } else { + return $rawprefix + } + #return [tcl::string::trimright $rawprefix :] + } + } + #see also punk::ns - keep in sync + proc nstail {nspath args} { + #normalize the common case of :::: + set nspath [tcl::string::map [list :::: ::] $nspath] + set mapped [tcl::string::map [list :: \u0FFF] $nspath] + set parts [split $mapped \u0FFF] + + set defaults [list -strict 0] + set opts [tcl::dict::merge $defaults $args] + set strict [tcl::dict::get $opts -strict] + + if {$strict} { + foreach p $parts { + if {[tcl::string::match :* $p]} { + error "nstail unpaired colon ':' in $nspath" + } + } + } + #e.g ::x::y:::z should return ":z" despite it being a bad idea for a command name. + return [lindex $parts end] + } + proc nsjoin {prefix name} { + if {[tcl::string::match ::* $name]} { + if {"$prefix" ne ""} { + error "nsjoin: won't join non-empty prefix to absolute namespace path '$name'" + } + return $name + } + if {"$prefix" eq "::"} { + return ::$name + } + #if {"$name" eq ""} { + # return $prefix + #} + #nsjoin ::x::y "" should return ::x::y:: - this is the correct fully qualified form used to call a command that is the empty string + return ${prefix}::$name + } + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::assertion [tcl::namespace::eval punk::assertion { + variable pkg punk::assertion + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap-0.1.0.tm new file mode 100644 index 00000000..68d3252e --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap-0.1.0.tm @@ -0,0 +1,696 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::cap 0.1.0 +# Meta platform tcl +# Meta description pkg capability register +# Meta license BSD +# @@ Meta End + + +#*** !doctools +#[manpage_begin punkshell_module_punk::cap 0 0.1.0] +#[copyright "2023 JMNoble - BSD licensed"] +#[titledesc {capability provider and handler plugin system}] +#[moddesc {punk capabilities plugin system}] +#[require punk::cap] +#[description] +#[keywords module capability plugin] +#[section Overview] +#[para]punk::cap provides management of named capabilities and the provider packages and handler packages that implement a pluggable capability. +#[para]see also [uri https://core.tcl-lang.org/tcllib/doc/trunk/embedded/md/tcllib/files/modules/pluginmgr/pluginmgr.md {tcllib pluginmgr}] for an alternative which uses safe interpreters +#[subsection Concepts] +#[para]A [term capability] may be something like providing a folder of files, or just a data dictionary, and/or an API +# +#[para][term {capability handler}] - a package/namespace which may provide validation and standardised ways of looking up provider data +# registered (or not) using register_capabilityname +# +#[para][term {capability provider}] - a package which registers as providing one or more capablities. +#[para]registered using register_package +#the capabilitylist is a list of 2-element lists where the first element is the capabilityname and the second element is a (possibly empty) dict of data relevant to that capability +#A capabilityname may appear multiple times. ie a package may register that it provides the capability with multiple datasets. + + +#*** !doctools +#[section API] + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require oolib + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::cap { + variable pkgcapsdeclared [tcl::dict::create] + variable pkgcapsaccepted [tcl::dict::create] + variable caps [tcl::dict::create] + namespace eval class { + if {[tcl::info::commands ::punk::cap::class::interface_caphandler.registry] eq ""} { + #*** !doctools + #[subsection {Namespace punk::cap::class}] + #[para] class definitions + #[list_begin itemized] [comment {- punk::cap::class groupings -}] + # [item] + # [para] [emph {handler_classes}] + # [list_begin enumerated] + + oo::class create ::punk::cap::class::interface_caphandler.registry { + #*** !doctools + #[enum] CLASS [class interface_caphandler.registry] + #[list_begin definitions] + # [para] [emph METHODS] + method pkg_register {pkg capname capdict fullcapabilitylist} { + #*** !doctools + #[call class::interface_caphandler.registry [method pkg_register] [arg pkg] [arg capname] [arg capdict] [arg fullcapabilitylist]] + #handler may override and return 0 (indicating don't register)e.g if pkg capdict data wasn't valid + #overridden handler must be able to handle multiple calls for same pkg - but it may return 1 or 0 as it wishes. + return 1 ;#default to permit + } + method pkg_unregister {pkg} { + #*** !doctools + #[call class::interface_caphandler.registry [method pkg_unregister] [arg pkg]] + return ;#unregistration return is ignored - review + } + #*** !doctools + #[list_end] + } + + oo::class create ::punk::cap::class::interface_caphandler.sysapi { + #*** !doctools + #[enum] CLASS [class interface_caphandler.sysapi] + #[list_begin definitions] + # [para] [emph METHODS] + + + #*** !doctools + #[list_end] + } + + #*** !doctools + # [list_end] [comment {- end enumeration handler classes -}] + + #*** !doctools + # [item] + # [para] [emph {provider_classes}] + # [list_begin enumerated] + + #Provider classes + oo::class create ::punk::cap::class::interface_capprovider.registration { + #*** !doctools + # [enum] CLASS [class interface_cappprovider.registration] + # [para]Your provider package will need to instantiate this object under a sub-namespace called [namespace capsystem] within your package namespace. + # [para]If your package namespace is mypackages::providerpkg then the object command would be at mypackages::providerpkg::capsystem::capprovider.registration + # [para]Example code for your provider package to evaluate within its namespace: + # [example { + #namespace eval capsystem { + # if {[info commands capprovider.registration] eq ""} { + # punk::cap::class::interface_capprovider.registration create capprovider.registration + # oo::objdefine capprovider.registration { + # method get_declarations {} { + # set decls [list] + # lappend decls [list punk.templates {relpath ../templates}] + # lappend decls [list another_capability_name {somekey blah key2 etc}] + # return $decls + # } + # } + # } + #} + #}] + #[para] The above example declares that your package can be registered as a provider for the capabilities named 'punk.templates' and 'another_capability_name' + # [list_begin definitions] + # [para] [emph METHODS] + method get_declarations {} { + #*** + #[call class::interface_capprovider.registration [method get_declarations]] + #[para] This method must be overridden by your provider using oo::objdefine cappprovider.registration as in the example above. + # There must be at least one 2-element list in the result for the provider to be registerable. + #[para]The first element of the list is the capabilityname - which can be custom to your provider/handler packages - or a well-known name that other authors may use/implement. + #[para]The second element is a dictionary of keys specific to the capability being implemented. It may be empty if the any potential capability handlers for the named capability don't require registration data. + error "interface_capprovider.registration not implemented by provider" + } + #*** !doctools + # [list_end] + } + + oo::class create ::punk::cap::class::interface_capprovider.provider { + #*** !doctools + # [enum] CLASS [class interface_capprovider.provider] + # [para] Your provider package will need to instantiate this directly under it's own namespace with the command name of [emph {provider}] + # [example { + # namespace eval mypackages::providerpkg { + # punk::cap::class::interface_capprovider.provider create provider mypackages::providerpkg + # } + # }] + # [list_begin definitions] + # [para] [emph METHODS] + variable provider_pkg + variable registrationobj + constructor {providerpkg} { + #*** !doctools + #[call class::interface_capprovider.provider [method constructor] [arg providerpkg]] + variable provider_pkg + if {$providerpkg in {"" "::"}} { + error "interface_capprovider.provider constructor error. Invalid provider '$providerpkg'" + } + if {![namespace exists ::$providerpkg]} { + error "interface_capprovider.provider constructor error. Invalid provider '$providerpkg' - matching namespace not found" + } + + set registrationobj ::${providerpkg}::capsystem::capprovider.registration + if {[tcl::info::commands $registrationobj] eq ""} { + error "capprovider.provider constructor error. Missing capprovider.registration interface at '$obj' (command not found) interface_capprovider.regstration instantiation must precede interface_capprovider.provider" + } + + #review - what are we trying to achieve here? + set provider_pkg [tcl::string::trim $providerpkg ""] + } + method register {{capabilityname_glob *}} { + #*** !doctools + #[comment {- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---}] + #[call class::interface_capprovider.provider [method register] [opt capabilityname_glob]] + # + #[para]This is the mechanism by which a user of your provider package will register your package as a provider of the capability named. + # + #[para]A user of your provider may elect to register all your declared capabilities: + #[example { + # package require mypackages::providerpkg + # mypackages::providerpkg::provider register * + #}] + #[para] Or a specific capability may be registered: + #[example { + # package require mypackages::providerpkg + # mypackages::providerpkg::provider register another_capability_name + #}] + # + variable provider_pkg + set all_decls [$registrationobj get_declarations] + set register_decls [lsearch -all -inline -index 0 $all_decls $capabilityname_glob] + punk::cap::register_package $provider_pkg $register_decls + } + method capabilities {} { + #*** !doctools + #[comment {- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---}] + #[call class::interface_capprovider.provider [method capabilities]] + #[para] return a list of capabilities supported by this provider package + variable provider_pkg + variable registrationobj + + set capabilities [list] + set decls [$registrationobj get_declarations] + foreach decl $decls { + lassign $decl capname capdict + if {$capname ni $capabilities} { + lappend capabilities $capname + } + } + return $capabilities + } + #*** !doctools + # [list_end] [comment {- end class definitions -}] + } + #*** !doctools + # [list_end] [comment {- end enumeration provider_classes }] + #[list_end] [comment {- end itemized list punk::cap::class groupings -}] + } + } ;# end namespace class + + #*** !doctools + #[subsection {Namespace punk::cap}] + #[para] Main punk::cap API for client programs interested in using capability handler packages and associated (registered) provider packages + #[list_begin definitions] + + #Not all capability names have to be registered. + #A package registering as a provider using register_package can include capabilitynames in it's capabilitylist which have no associated handler. + #such unregistered capabilitynames may be used just to flag something, or have datamembers significant to callers cooperatively interested in that capname. + #we allow registering a capability with an empty handler (capnamespace) - but this means another handler could be registered later. + proc register_capabilityname {capname capnamespace} { + #puts stderr "REGISTER_CAPABILITYNAME $capname $capnamespace" + variable caps + variable pkgcapsdeclared + variable pkgcapsaccepted + if {$capnamespace ne ""} { + #normalize with leading :: in case caller passed in package name rather than fully qualified namespace + if {![tcl::string::match ::* $capnamespace]} { + set capnamespace ::$capnamespace + } + } + #allow register of existing capname iff there is no current handler + #as handlers can be used to validate during provider registration - ideally handlers should be registered before any pkgs call register_package + #we allow loading a handler later though - but will need to validate existing data from pkgs that have already registered as providers + if {[set hdlr [capability_get_handler $capname]] ne ""} { + puts stderr "register_capabilityname cannot register capability:$capname with handler:$capnamespace. There is already a registered handler:$hdlr" + return + } + #assertion: capnamespace may or may not be empty string, capname may or may not already exist in caps dict, caps $capname providers may have existing entries. + tcl::dict::set caps $capname handler $capnamespace + if {![tcl::dict::exists $caps $capname providers]} { + tcl::dict::set caps $capname providers [list] + } + if {[llength [set providers [tcl::dict::get $caps $capname providers]]]} { + #some provider(s) were in place before the handler was registered + if {[set capreg [punk::cap::capsystem::get_caphandler_registry $capname]] ne ""} { + foreach pkg $providers { + set fullcapabilitylist [tcl::dict::get $pkgcapsdeclared $pkg] + set capname_capabilitylist [lsearch -all -inline -index 0 $fullcapabilitylist $capname] + foreach capspec $capname_capabilitylist { + lassign $capspec cn capdict + #if {$cn ne $capname} { + # continue + #} + if {[catch {$capreg pkg_register $pkg $capdict $fullcapabilitylist} do_register]} { + puts stderr "punk::cap::register_capabilityname '$capname' '$capnamespace' failed to register provider package '$pkg' - possible error in handler or provider" + puts stderr "error message:" + puts stderr $do_register + set do_register 0 + } + + set list_accepted [tcl::dict::get $pkgcapsaccepted $pkg] + if {$do_register} { + if {$capspec ni $list_accepted} { + tcl::dict::lappend pkgcapsaccepted $pkg $capspec + } + } else { + set posn [lsearch $list_accepted $capspec] + if {$posn >=0} { + set list_accepted [lreplace $list_accepted $posn $posn] + tcl::dict::set pkgcapsaccepted $pkg $list_accepted + } + } + } + #check if any accepted for this cap and remove from caps as necessary + set count 0 + foreach accepted_capspec [tcl::dict::get $pkgcapsaccepted $pkg] { + if {[lindex $accepted_capspec 0] eq $capname} { + incr count + } + } + if {$count == 0} { + set pkgposn [lsearch $providers $pkg] + if {$pkgposn >= 0} { + set updated_providers [lreplace $providers $posn $posn] + tcl::dict::set caps $capname providers $updated_providers + } + } + } + + } + + } + } + proc capability_exists {capname} { + #*** !doctools + # [call [fun capability_exists] [arg capname]] + # Return a boolean indicating if the named capability exists (0|1) + variable caps + return [tcl::dict::exists $caps $capname] + } + proc capability_has_handler {capname} { + #*** !doctools + # [call [fun capability_has_handler] [arg capname]] + #Return a boolean indicating if the named capability has a handler package installed (0|1) + variable caps + return [expr {[tcl::dict::exists $caps $capname handler] && [tcl::dict::get $caps $capname handler] ne ""}] + } + proc capability_get_handler {capname} { + #*** !doctools + # [call [fun capability_get_handler] [arg capname]] + #Return the base namespace of the active handler package for the named capability. + #[para] The base namespace for a handler will always be the package name, but prefixed with :: + variable caps + if {[tcl::dict::exists $caps $capname]} { + return [tcl::dict::get $caps $capname handler] + } + return "" + } + proc call_handler {capname args} { + if {[set handler [capability_get_handler $capname]] eq ""} { + error "punk::cap::call_handler $capname $args - no handler registered for capability $capname" + } + set obj ${handler}::api_$capname + $obj [lindex $args 0] {*}[lrange $args 1 end] + } + proc get_providers {capname} { + variable caps + if {[tcl::dict::exists $caps $capname]} { + return [tcl::dict::get $caps $capname providers] + } + return [list] + } + + #register package with arbitrary capnames from capabilitylist + #The registered pkg is a module that provides some service to that capname. Possibly just data members or possibly an implementation of an API, that the capability will use. + proc register_package {pkg capabilitylist args} { + variable pkgcapsdeclared + variable pkgcapsaccepted + variable caps + set opts [dict create\ + -nowarnings false + ] + foreach {k v} $args { + switch -- $k { + -nowarnings { + tcl::dict::set opts $k $v + } + default { + error "Unrecognized option $k. Known options [tcl::dict::keys $opts]" + } + } + } + set warnings [expr {! [tcl::dict::get $opts -nowarnings]}] + + if {[tcl::string::match ::* $pkg]} { + set pkg [tcl::string::range $pkg 2 end] + } + if {[tcl::dict::exists $pkgcapsaccepted $pkg]} { + set pkg_already_accepted [tcl::dict::get $pkgcapsaccepted $pkg] + } else { + set pkg_already_accepted [list] + } + package require $pkg + set providerapi ::${pkg}::provider + if {[tcl::info::commands $providerapi] eq ""} { + error "register_package error. pkg '$pkg' doesn't seem to be a punk::cap capability provider (no object found at $providerapi)" + } + set defined_caps [$providerapi capabilities] + #for each capability + # - ensure 1st element is a single word + # - ensure that if 2nd element (capdict) is present - it is dict shaped + set capabilitylist_count [llength $capabilitylist] + set accepted_count 0 + set errorlist [list];# list of dicts + set warninglist [list] + foreach capspec $capabilitylist { + lassign $capspec capname capdict + + if {$warnings} { + if {$capname ni $defined_caps} { + puts stderr "WARNING: pkg '$pkg' doesn't declare support for capability '$capname'." + } + } + if {[llength $capname] !=1} { + puts stderr "register_package error. pkg: '$pkg' An entry in the capability list doesn't appear to have a single-word name. Problematic entry:'$capspec'" + set reason "First element of capspec not a single-word name" + lappend errorlist [tcl::dict::create msg $reason capspec $capspec] + continue + } + if {[expr {[llength $capdict] %2 != 0}]} { + puts stderr "register_package error. pkg:'$pkg' The second element for capname:'$capname' doesn't appear to be a valid dict. Problematic entry: '$capspec'" + set reason "The second element of the capspec isn't a valid dict" + lappend errorlist [tcl::dict::create msg $reason capspec $capspec] + continue + } + if {$capspec in $pkg_already_accepted} { + #review - multiple handlers? if so - will need to record which handler(s) accepted the capspec + if {$warnings} { + puts stderr "WARNING: register_package pkg $pkg already has capspec marked as accepted: $capspec" + } + lappend warninglist [tcl::dict::create msg "pkg $pkg already has this capspec marked as accepted" capspec $capspec] + continue + } + if {[tcl::dict::exists $caps $capname]} { + set cap_pkgs [tcl::dict::get $caps $capname providers] + } else { + dict set caps $capname [tcl::dict::create handler "" providers [list]] + set cap_pkgs [list] + } + #todo - if there's a caphandler - call it's init/validation callback for the pkg + set do_register 1 ;#default assumption unless vetoed by handler + if {[set capreg [punk::cap::capsystem::get_caphandler_registry $capname]] ne ""} { + #Note that the interface_caphandler.registry instance must be able to handle multiple calls for same pkg + set do_register [$capreg pkg_register $pkg $capname $capdict $capabilitylist] + } + if {$do_register} { + if {$pkg ni $cap_pkgs} { + lappend cap_pkgs $pkg + tcl::dict::set caps $capname providers $cap_pkgs + } + tcl::dict::lappend pkgcapsaccepted $pkg $capspec ;#if pkg is being registered prior to handler-registration - the handler may undo this entry + } + } + #another call to register_pkg with same pkg may have been made (most likely with different capname) so we must append - but check not already present + #dict lappend pkgcapsdeclared $pkg $capabilitylist + if {[tcl::dict::exists $pkgcapsdeclared $pkg]} { + #review - untested + set mergecapspecs [tcl::dict::get $pkgcapsdeclared $pkg] + foreach spec $capabilitylist { + if {$spec ni $mergecapspecs} { + lappend mergecapspecs $spec + } + } + tcl::dict::set pkgcapsdeclared $pkg $mergecapspecs + } else { + tcl::dict::set pkgcapsdeclared $pkg $capabilitylist + } + set resultdict [list num_capabilities $capabilitylist_count num_accepted $accepted_count] + if {[llength $errorlist]} { + tcl::dict::set resultdict errors $errorlist + } + if {[llength $warninglist]} { + tcl::dict::set resultdict warnings $warninglist + } + return $resultdict + } + + #todo! + proc unregister_package {pkg {capname *}} { + variable pkgcapsdeclared + variable caps + if {[string match ::* $pkg]} { + set pkg [string range $pkg 2 end] + } + if {[dict exists $pkgcapsdeclared $pkg]} { + #remove corresponding entries in caps + set capabilitylist [dict get $pkgcapsdeclared $pkg] + foreach c $capabilitylist { + set do_unregister 1 + lassign $c capname _capdict + set cap_info [dict get $caps $capname] + set pkglist [dict get $cap_info providers] + set posn [lsearch $pkglist $pkg] + if {$posn >= 0} { + if {[set capreg [punk::cap::capsystem::get_caphandler_registry $capname]] ne ""} { + #review + # it seems not useful to allow the callback to block this unregister action + #the pkg may have multiple datasets for each capname so callback will only be called for first dataset we encounter + #vetoing unregister would make this more complex for no particular advantage + #if per dataset deregistration required this should probably be a separate thing + $capreg pkg_unregister $pkg $capname + } + set pkglist [lreplace $pkglist $posn $posn] + dict set caps $capname providers $pkglist + } + } + #delete the main registration record + dict unset pkgcapsdeclared $pkg + } + } + + proc pkgcap {pkg {capsearch}} { + variable pkgcapsdeclared + variable pkgcapsaccepted + if {[string match ::* $pkg]} { + set pkg [string range $pkg 2 end] + } + if {[dict exists $pkgcapsdeclared $pkg]} { + set accepted "" + if {[dict exists $pkgcapsaccepted $pkg]} { + set accepted [lsearch -all -inline -glob -index 0 [dict get $pkgcapsaccepted $pkg] $capsearch] + } + return [dict create declared [lsearch -all -inline -glob -index 0 [dict get $pkgcapsdeclared $pkg] $capsearch] accepted $accepted] + } else { + return + } + } + proc pkgcaps {} { + variable pkgcapsdeclared + variable pkgcapsaccepted + set result [dict create] + foreach {pkg capsdeclared} $pkgcapsdeclared { + set accepted "" + if {[dict exists $pkgcapsaccepted $pkg]} { + set accepted [dict get $pkgcapsaccepted $pkg] + } + dict set result $pkg declared $capsdeclared + dict set result $pkg accepted $accepted + } + return $result + } + + proc capability {capname} { + variable caps + if {[dict exists $caps $capname]} { + return [dict get $caps $capname] + } + return "" + } + proc capabilities {{glob *}} { + variable caps + set capnames [lsort [dict keys $caps $glob]] + set cap_list [list] + foreach capname $capnames { + lappend cap_list [list $capname [dict get $caps $capname]] + } + return $cap_list + } + + proc capabilitynames {{glob *}} { + variable caps + return [lsort [dict keys $caps $glob]] + } + #return only those capnames which have at least one provider + proc capabilitynames_provided {{glob *}} { + variable caps + set keys [lsort [dict keys $caps $glob]] + set cap_list [list] + foreach k $keys { + if {[llength [dict get $caps $k providers]] > 0} { + lappend cap_list $k + } + } + return $cap_list + } + #*** !doctools + #[list_end] [comment {- end definitions for namespace punk::cap -}] + + namespace eval advanced { + #*** !doctools + #[subsection {Namespace punk::cap::advanced}] + #[para] punk::cap::advanced API. Functions here are generally not the preferred way to interact with punk::cap. + #[para] In some cases they may allow interaction in less safe ways or may allow use of features that are unavailable in the base namespace. + #[para] Some functions are here because they are only marginally or rarely useful, and they are here to keep the base API simple. + #[list_begin definitions] + + proc promote_provider {pkg} { + #*** !doctools + # [call advanced::[fun promote_provider] [arg pkg]] + #[para]Move the named provider package to the preferred end of the list (tail). + #[para]The active handler may or may not utilise this for preferencing. See documentation for the specific handler package to confirm. + #[para] + #[para] promote/demote doesn't always make a lot of sense .. should preferably be configurable per capapbility for multicap provider pkgs + #[para]The idea is to provide a crude way to preference/depreference packages independently of order the packages were loaded + #e.g a caller or cap-handler can ascribe some meaning to the order of the 'providers' key returned from punk::cap::capabilities + #[para]The order of providers will be the order the packages were loaded & registered + #[para]the naming: "promote vs demote" operates on a latest-package-in-list has higher preference assumption (matching last pkg loaded) + #[para]Each capability handler could and should implement specific preferencing methods within its own API if finer control needed. + #In some cases the preference/loading order may be inapplicable/irrelevant to a particular capability anyway. + #[para]As this is just a basic mechanism, which can't support independent per-cap preferencing for multi-cap packages - + # it only allows putting the pkgs to the head or tail of the lists. + #[para]Whether particular caps or users of caps do anything with this ordering is dependent on the cap-handler and/or calling code. + variable pkgcapsdeclared + variable caps + if {[string match ::* $pkg]} { + set pkg [string range $pkg 2 end] + } + if {![dict exists $pkgcapsdeclared $pkg]} { + error "punk::cap::promote_package error pkg'$pkg' not registered. Use register_package \$pkg first" + } + if {[dict size $pkgcapsdeclared] > 1} { + set pkginfo [dict get $pkgcapsdeclared $pkg] + #remove and re-add at end of dict + dict unset pkgcapsdeclared $pkg + dict set pkgcapsdeclared $pkg $pkginfo + dict for {cap cap_info} $caps { + set cap_pkgs [dict get $cap_info providers] + if {$pkg in $cap_pkgs} { + set posn [lsearch $cap_pkgs $pkg] + if {$posn >=0} { + #rewrite package list with pkg at tail of list for this capability + set cap_pkgs [lreplace $cap_pkgs $posn $posn] + lappend cap_pkgs $pkg + dict set caps $cap providers $cap_pkgs + } + } + } + } + } + proc demote_provider {pkg} { + #*** !doctools + # [call advanced::[fun demote_provider] [arg pkg]] + #[para]Move the named provider package to the preferred end of the list (tail). + #[para]The active handler may or may not utilise this for preferencing. See documentation for the specific handler package to confirm. + variable pkgcapsdeclared + variable caps + if {[string match ::* $pkg]} { + set pkg [string range $pkg 2 end] + } + if {![dict exists $pkgcapsdeclared $pkg]} { + error "punk::cap::promote_package error pkg'$pkg' not registered. Use register_package \$pkg first" + } + if {[dict size $pkgcapsdeclared] > 1} { + set pkginfo [dict get $pkgcapsdeclared $pkg] + #remove and re-add at start of dict + dict unset pkgcapsdeclared $pkg + dict set pkgcapsdeclared $pkg $pkginfo + set pkgcapsdeclared [dict merge [dict create $pkg $pkginfo] $pkgcapsdeclared] + dict for {cap cap_info} $caps { + set cap_pkgs [dict get $cap_info providers] + if {$pkg in $cap_pkgs} { + set posn [lsearch $cap_pkgs $pkg] + if {$posn >=0} { + #rewrite package list with pkg at head of list for this capability + set cap_pkgs [lreplace $cap_pkgs $posn $posn] + set cap_pkgs [list $pkg {*}$cap_pkgs] + dict set caps $cap providers $cap_pkgs + } + } + } + } + } + + #*** !doctools + #[list_end] + } + + +#*** !doctools +#[section Internal] + + namespace eval capsystem { + #*** !doctools + #[subsection {Namespace punk::cap::capsystem}] + #[para] Internal functions used to communicate between punk::cap and capability handlers + #[list_begin definitions] + proc get_caphandler_registry {capname} { + set ns [::punk::cap::capability_get_handler $capname]::capsystem + if {[namespace exists ${ns}]} { + if {[info command ${ns}::caphandler.registry] ne ""} { + if {[info object isa object ${ns}::caphandler.registry]} { + return ${ns}::caphandler.registry + } + } + } + return "" + } + #*** !doctools + #[list_end] + } +} + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::cap [namespace eval punk::cap { + variable version + variable pkg punk::cap + set version 0.1.0 + variable README.md [string map [list %pkg% $pkg %ver% $version] { + # punk capabilities system + ## pkg: %pkg% version: %ver% + + punk::cap base namespace + }] + return $version +}] +return + +#*** !doctools +#[manpage_end] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/caphandler-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/caphandler-0.1.0.tm new file mode 100644 index 00000000..8fdce944 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/caphandler-0.1.0.tm @@ -0,0 +1,52 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::cap::handlers::caphandler 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::cap::handlers::caphandler { + + + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::cap::handlers::caphandler [namespace eval punk::cap::handlers::caphandler { + variable pkg punk::cap::handlers::caphandler + variable version + set version 0.1.0 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/scriptlibs-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/scriptlibs-0.1.0.tm new file mode 100644 index 00000000..8298ec18 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/scriptlibs-0.1.0.tm @@ -0,0 +1,52 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::cap::handlers::scriptlibs 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::cap::handlers::scriptlibs { + + + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::cap::handlers::scriptlibs [namespace eval punk::cap::handlers::scriptlibs { + variable pkg punk::cap::handlers::scriptlibs + variable version + set version 0.1.0 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm new file mode 100644 index 00000000..2926b237 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/cap/handlers/templates-0.1.0.tm @@ -0,0 +1,766 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::cap::handlers::templates 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +package require punk::repo + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#register using: +# punk::cap::register_capabilityname templates ::punk::cap::handlers::templates + +#By convention and for consistency, we don't register here during package loading - but require the calling app to do it. +# (even if it tends to be done immediately after package require anyway) +# registering capability handlers can involve validating existing provider data and is best done explicitly as required. +# It is also possible for a capability handler to be registered to handle more than one capabilityname + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::cap::handlers::templates { + namespace eval capsystem { + #interfaces for punk::cap to call into + if {[info commands caphandler.registry] eq ""} { + punk::cap::class::interface_caphandler.registry create caphandler.registry + oo::objdefine caphandler.registry { + method pkg_register {pkg capname capdict caplist} { + #caplist may not be complete set - which somewhat reduces its utility here regarding any decisions based on the context of this capname/capdict (review - remove this arg?) + + # -- --- --- --- --- --- --- ---- --- + # validation of capdict + # -- --- --- --- --- --- --- ---- --- + if {![dict exists $capdict vendor]} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability, but is missing the 'vendor' key" + return 0 + } + if {![dict exists $capdict path] || ![dict exists $capdict pathtype]} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability, but is missing the 'path' or 'pathtype' key" + return 0 + } + set pathtype [dict get $capdict pathtype] + set vendor [dict get $capdict vendor] + set known_pathtypes [list adhoc currentproject_multivendor currentproject shellproject_multivendor shellproject module absolute] + if {$pathtype ni $known_pathtypes} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability, but 'pathtype' value '$pathtype' is not recognised. Known type: $known_pathtypes" + return 0 + } + + set path [dict get $capdict path] + + set cname [string map {. _} $capname] + + set multivendor_package_whitelist [list punk::mix::templates] + + + #for template pathtype module & shellproject* we can resolve whether it's within a project at registration time and store the projectbase rather than rechecking it each time the templates handler api is called + #for template pathtype absolute - we can do the same. + #There is a small chance for a long-running shell that a project is later created which makes the absolute path within a project - but it seems an unlikely case, and probably won't surprise the user that they need to relaunch the shell or reload the capsystem to see the change. + + #adhoc and currentproject* paths are relative to cwd - so no projectbase information can be stored at registration time. + #not all template item types will need projectbase information - as the item data may be self-contained within the template structure - + #but project_layout will need it - or at least need to know if there is no project - because project_layout data is never stored in the template folder structure directly. + switch -- $pathtype { + adhoc { + if {[file pathtype $path] ne "relative"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path" + return 0 + } + set extended_capdict $capdict + dict set extended_capdict vendor $vendor + } + module { + set provide_statement [package ifneeded $pkg [package require $pkg]] + set tmfile [lindex $provide_statement end] + if {[interp issafe]} { + #default safe interp can't use file exists/normalize etc.. but safe interp may have a policy/alias set allowing file access to certain paths - so test if file exists is usable + if {[catch {file exists $tmfile} tm_exists]} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING (expected in most safe interps) - unable to determine base folder for package '$pkg' which is attempting to register with punk::cap as a provider of '$capname' capability" + flush stderr + return 0 + } + } else { + set tm_exists [file exists $tmfile] + } + if {![file exists $tmfile]} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - unable to determine base folder for package '$pkg' which is attempting to register with punk::cap as a provider of '$capname' capability" + flush stderr + return 0 + } + + if {[file pathtype $path] ne "relative"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path" + } + set tmfolder [file dirname $tmfile] + #todo - handle wrapped or unwrapped tarjar files - in which case we have to adjust tmfolder appropriately + #set tpath [file normalize [file join $tmfile [dict get $capdict relpath]]] ;#relpath is relative to the tm *file* - not it's containing folder + + set projectinfo [punk::repo::find_repos $tmfolder] + set projectbase [dict get $projectinfo closest] + #store the projectbase even if it's empty string + set extended_capdict $capdict + set resolved_path [file join $tmfolder $path] + dict set extended_capdict resolved_path $resolved_path + dict set extended_capdict projectbase $projectbase + } + currentproject_multivendor { + #currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense + if {$pkg ni $multivendor_package_whitelist} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported" + return 0 + } + if {[file pathtype $path] ne "relative"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path" + return 0 + } + + set extended_capdict $capdict + dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor? + } + currentproject { + if {[file pathtype $path] ne "relative"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path" + return 0 + } + #verify that the relative path is within the relative path of a currentproject_multivendor tree + #todo - api for the _multivendor tree controlling package to validate + + + set extended_capdict $capdict + dict set extended_capdict vendor $vendor + } + shellproject { + if {[file pathtype $path] ne "relative"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path" + return 0 + } + set shellbase [file dirname [file dirname [file normalize [set ::argv0]/__]]] ;#review + set projectinfo [punk::repo::find_repos $shellbase] + set projectbase [dict get $projectinfo closest] + + set extended_capdict $capdict + dict set extended_capdict vendor $vendor + dict set extended_capdict projectbase $projectbase + } + shellproject_multivendor { + #currently only intended for punk::templates - review if 3rd party _multivendor trees even make sense + if {$pkg ni $multivendor_package_whitelist} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported" + return 0 + } + if {[file pathtype $path] ne "relative"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path" + return 0 + } + set shellbase [file dirname [file dirname [file normalize [set ::argv0]/__]]] ;#review + set projectinfo [punk::repo::find_repos $shellbase] + set projectbase [dict get $projectinfo closest] + + set extended_capdict $capdict + dict set extended_capdict vendor $vendor + dict set extended_capdict projectbase $projectbase + } + absolute { + if {[file pathtype $path] ne "absolute"} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute" + return 0 + } + set normpath [file normalize $path] + if {!file exists $normpath} { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' which doesn't seem to exist" + return 0 + } + set projectinfo [punk::repo::find_repos $normpath] + set projectbase [dict get $projectinfo closest] + + #todo - verify no other provider has registered same absolute path - if sharing a project-external location is needed - they need their own subfolder + set extended_capdict $capdict + dict set extended_capdict resolved_path $normpath + dict set extended_capdict vendor $vendor + dict set extended_capdict projectbase $projectbase + } + default { + puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype" + return 0 + } + } + + # -- --- --- --- --- --- --- ---- --- + # update package internal data + # -- --- --- --- --- --- --- ---- --- + upvar ::punk::cap::handlers::templates::provider_info_$cname provider_info + + if {$capname ni $::punk::cap::handlers::templates::handled_caps} { + lappend ::punk::cap::handlers::templates::handled_caps $capname + } + if {![info exists provider_info] || $extended_capdict ni [dict get $provider_info $pkg]} { + #this checks for duplicates from the same provider - but not if other providers already added the path + #review - + dict lappend provider_info $pkg $extended_capdict + } + + + # -- --- --- --- --- --- --- ---- --- + # instantiation of api at punk::cap::handlers::templates::api_$capname + # -- --- --- --- --- --- --- ---- --- + set apicmd "::punk::cap::handlers::templates::api_$capname" + if {[info commands $apicmd] eq ""} { + punk::cap::handlers::templates::class::api create $apicmd $capname + } + + return 1 + } + method pkg_unregister {pkg} { + upvar ::punk::cap::handlers::templates::handled_caps hcaps + foreach capname $hcaps { + set cname [string map {. _} $capname] + upvar ::punk::cap::handlers::templates::provider_info_$cname my_provider_info + dict unset my_provider_info $pkg + #destroy api objects? + } + } + } + } + } + + variable handled_caps [list] + #variable pkg_folders [dict create] + + # -- --- --- --- --- --- --- + #handler api for clients of this capability - called via punk::cap::call_handler ?args? + # -- --- --- --- --- --- --- + namespace export * + namespace eval class { + oo::class create api { + #return a dict keyed on folder with source pkg as value + constructor {capname} { + variable capabilityname + variable cname + set cname [string map {. _} $capname] + set capabilityname $capname + } + method folders {args} { + set argd [punk::args::get_dict { + -startdir -default "" + *values -max 0 + } $args] + set opts [dict get $argd opts] + + set opt_startdir [dict get $opts -startdir] + if {$opt_startdir eq ""} { + set startdir [pwd] + } else { + if {[file pathtype $opt_startdir] eq "relative"} { + set startdir [file join [pwd] $opt_startdir] + } else { + set startdir $opt_startdir + } + } + + + variable capabilityname + variable cname + upvar ::punk::cap::handlers::templates::provider_info_$cname my_provider_info + package require punk::cap + set capinfo [punk::cap::capability $capabilityname] + # e.g {punk.templates {handler punk::mix::templates providers ::somepkg}} + + #use the order of pkgs as registered with punk::cap - may have been modified with punk::cap::promote_package/demote_package + set providerpkg [dict get $capinfo providers] + set folderdict [dict create] + + #maintain separate paths for different override levels - all keyed on vendor (or pseudo-vendor '_project') + set found_paths_adhoc [dict create] + set found_paths_module [dict create] + set found_paths_currentproject_multivendor [dict create] + set found_paths_currentproject [dict create] + set found_paths_shellproject_multivendor [dict create] + set found_paths_shellproject [dict create] + set found_paths_absolute [list] + + + foreach pkg $providerpkg { + set found_paths [list] + #set acceptedlist [dict get [punk::cap::pkgcap $pkg $capabilityname] accepted] + + foreach capdecl_extended [dict get $my_provider_info $pkg] { + #basic validation and extension was done when accepted - so we can trust the capdecl_extended dictionary has the right entries + + set path [dict get $capdecl_extended path] + set pathtype [dict get $capdecl_extended pathtype] + set vendor [dict get $capdecl_extended vendor] + # projectbase not present in capdecl_extended for all template pathtypes + if {$pathtype eq "adhoc"} { + #e.g (cwd)/templates + set targetpath [file join $startdir [dict get $capdecl_extended path]] + if {[file isdirectory $targetpath]} { + dict lappend found_paths_adhoc $vendor [list pkg $pkg path $targetpath pathtype $pathtype] + } + } elseif {$pathtype eq "module"} { + set module_projectroot [dict get $capdecl_extended projectbase] + dict lappend found_paths_module $vendor [list pkg $pkg path [dict get $capdecl_extended resolved_path] pathtype $pathtype projectbase $module_projectroot] + } elseif {$pathtype eq "currentproject_multivendor"} { + set searchbase $startdir + set pathinfo [punk::repo::find_repos $searchbase] + set pwd_projectroot [dict get $pathinfo closest] + if {$pwd_projectroot ne ""} { + set deckbase [file join $pwd_projectroot $path] + if {![file exists $deckbase]} { + continue + } + #add vendor/x folders first - earlier in list is lower priority + set vendorbase [file join $deckbase vendor] + if {[file isdirectory $vendorbase]} { + set vendorfolders [glob -nocomplain -dir $vendorbase -type d -tails *] + foreach vf $vendorfolders { + if {$vf ne "_project"} { + dict lappend found_paths_currentproject_multivendor $vf [list pkg $pkg path [file join $vendorbase $vf] pathtype $pathtype] + } + } + if {[file isdirectory [file join $vendorbase _project]]} { + dict lappend found_paths_currentproject_multivendor _project [list pkg $pkg path [file join $vendorbase _project] pathtype $pathtype] + } + } + set custombase [file join $deckbase custom] + if {[file isdirectory $custombase]} { + set customfolders [glob -nocomplain -dir $custombase -type d -tails *] + foreach cf $customfolders { + if {$cf ne "_project"} { + dict lappend found_paths_currentproject_multivendor $cf [list pkg $pkg path [file join $custombase $cf] pathtype $pathtype] + } + } + if {[file isdirectory [file join $custombase _project]]} { + dict lappend found_paths_currentproject_multivendor _project [list pkg $pkg path [file join $custombase _project] pathtype $pathtype] + } + } + } + } elseif {$pathtype eq "currentproject"} { + set searchbase $startdir + set pathinfo [punk::repo::find_repos $searchbase] + set pwd_projectroot [dict get $pathinfo closest] + if {$pwd_projectroot ne ""} { + #path relative to projectroot already validated by handler as being within a currentproject_multivendor tree + set targetfolder [file join $pwd_projectroot $path] + if {[file isdirectory $targetfolder]} { + dict lappend found_paths_currentproject $vendor [list pkg $pkg path $targetfolder pathtype $pathtype] + } + } + } elseif {$pathtype eq "shellproject_multivendor"} { + #review - consider also [info script] - but it can be empty if we just start a tclsh, load packages and start a repl + #set shellbase [file dirname [file dirname [file normalize [set ::argv0]/__]]] ;#review + #set pathinfo [punk::repo::find_repos $shellbase] + #set pwd_projectroot [dict get $pathinfo closest] + + set shell_projectroot [dict get $capdecl_extended projectbase] + if {$shell_projectroot ne ""} { + set deckbase [file join $shell_projectroot $path] + if {![file exists $deckbase]} { + continue + } + #add vendor/x folders first - earlier in list is lower priority + set vendorbase [file join $deckbase vendor] + if {[file isdirectory $vendorbase]} { + set vendorfolders [glob -nocomplain -dir $vendorbase -type d -tails *] + foreach vf $vendorfolders { + if {$vf ne "_project"} { + dict lappend found_paths_shellproject_multivendor $vf [list pkg $pkg path [file join $vendorbase $vf] pathtype $pathtype projectbase $shell_projectroot] + } + } + if {[file isdirectory [file join $vendorbase _project]]} { + dict lappend found_paths_shellproject_multivendor _project [list pkg $pkg path [file join $vendorbase _project] pathtype $pathtype projectbase $shell_projectroot] + } + } + set custombase [file join $deckbase custom] + if {[file isdirectory $custombase]} { + set customfolders [glob -nocomplain -dir $custombase -type d -tails *] + foreach cf $customfolders { + if {$cf ne "_project"} { + dict lappend found_paths_shellproject_multivendor $cf [list pkg $pkg path [file join $custombase $cf] pathtype $pathtype projectbase $shell_projectroot] + } + } + if {[file isdirectory [file join $custombase _project]]} { + dict lappend found_paths_shellproject_multivendor _project [list pkg $pkg path [file join $custombase _project] pathtype $pathtype projectbase $shell_projectroot] + } + } + + } + + } elseif {$pathtype eq "shellproject"} { + #review - consider also [info script] - but it can be empty if we just start a tclsh, load packages and start a repl + #set shellbase [file dirname [file dirname [file normalize [set ::argv0]/__]]] ;#review + #set pathinfo [punk::repo::find_repos $shellbase] + #set pwd_projectroot [dict get $pathinfo closest] + + set shell_projectroot [dict get $capdecl_extended projectbase] + if {$shell_projectroot ne ""} { + set targetfolder [file join $shell_projectroot $path] + if {[file isdirectory $targetfolder]} { + dict lappend found_paths_shellproject $vendor [list pkg $pkg path $targetfolder pathtype $pathtype projectbase $shell_projectroot] + } + } + } elseif {$pathtype eq "absolute"} { + #lappend found_paths [dict get $capdecl_extended resolved_path] + set abs_projectroot [dict get $capdecl_extended projectbase] + dict lappend found_paths_absolute $vendor [list pkg $pkg path [dict get $capdecl_extended resolved_path] pathtype $pathtype projectbase $abs_projectroot] + } + + } + + #todo - ensure vendor pkg capdict elements such source and allowupdates override any existing entry from a _multivendor pkg? + #currently relying on order in which loaded? review + #foreach pfolder $found_paths { + # dict set folderdict $pfolder [list source $pkg sourcetype package] + #} + } + + #add in order of preference low priority to high + + dict for {vendor pathinfolist} $found_paths_module { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] projectbase [dict get $pathinfo projectbase] vendor $vendor] + } + } + + #Templates within project of shell we launched with has lower priority than 'currentproject' (which depends on our CWD) + dict for {vendor pathinfolist} $found_paths_shellproject_multivendor { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] projectbase [dict get $pathinfo projectbase] vendor $vendor] + } + } + dict for {vendor pathinfolist} $found_paths_shellproject { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] projectbase [dict get $pathinfo projectbase] vendor $vendor] + } + } + + dict for {vendor pathinfolist} $found_paths_currentproject_multivendor { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] vendor $vendor] + } + } + dict for {vendor pathinfolist} $found_paths_currentproject { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] vendor $vendor] + } + } + dict for {vendor pathinfolist} $found_paths_absolute { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] projectbase [dict get $pathinfo projectbase] vendor $vendor] + } + } + #adhoc paths relative to cwd (or specified -startdir) can override any + dict for {vendor pathinfolist} $found_paths_adhoc { + foreach pathinfo $pathinfolist { + dict set folderdict [dict get $pathinfo path] [list source [dict get $pathinfo pkg] sourcetype package pathtype [dict get $pathinfo pathtype] vendor $vendor] + } + } + return $folderdict + } + method get_itemdict_projectlayouts {args} { + set argd [punk::args::get_dict { + *opts -anyopts 1 + #peek -startdir while allowing all other opts/vals to be verified down-the-line instead of here + -startdir -default "" + *values -maxvalues -1 + } $args] + set opt_startdir [dict get $argd opts -startdir] + + if {$opt_startdir eq ""} { + set searchbase [pwd] + } else { + set searchbase $opt_startdir + } + + set refdict [my get_itemdict_projectlayoutrefs {*}$args] + set layoutdict [dict create] + + set projectinfo [punk::repo::find_repos $searchbase] + set projectroot [dict get $projectinfo closest] + + dict for {layoutname refinfo} $refdict { + set templatepathtype [dict get $refinfo sourceinfo pathtype] + set sourceinfo [dict get $refinfo sourceinfo] + set path [dict get $refinfo path] + set reftail [file tail $path] + set atparts [split [file rootname $reftail] @] + #may be two @s if referencing a renamed layout override? + # e.g ref may be @vendor+punks+othersample@sample-0.1 or layoutalias-1.1@vendor+punk+othersample@sample-0.1 + #there must always be an @ before vendor or custom . There is either a template-name alias or empty string before this first @ + #trim off first @ part + set tailats [join [lrange $atparts 1 end] @] + # @ parts after the first are part of the path within the project_layouts structure + set subpathlist [split $tailats +] + if {[dict exists $refinfo sourceinfo projectbase]} { + #some template pathtypes refer to the projectroot from the template - not the cwd + set projectroot [dict get $refinfo sourceinfo projectbase] + } + + if {$projectroot ne ""} { + set layoutroot [file join $projectroot src/project_layouts] + set layoutfolder [file join $layoutroot {*}$subpathlist] + if {[file isdirectory $layoutfolder]} { + #todo - check if layoutname already in layoutdict append .ref path to list of refs that linked to this layout? + set layoutinfo [list path $layoutfolder basefolder $layoutroot sourceinfo $sourceinfo] + dict set layoutdict $layoutname $layoutinfo + } + } + } + return $layoutdict + } + method get_itemdict_projectlayoutrefs {args} { + set config { + -templatefolder_subdir "layout_refs"\ + -command_get_items_from_base {apply {{base} { + set matched_files [glob -nocomplain -dir $base -type f *@*.ref] + set items [list] + foreach rf $matched_files { + #puts stderr "--> $rf" + if {[string match ignore* $rf]} { + continue + } + #we silently skip .ref files that don't match - todo - more verification - and warn of invalid .refs? + if {[string match *@vendor+* $rf] || [string match *@custom+* $rf]} { + lappend items $rf + } + } + return $items + }}}\ + -command_get_item_name {apply {{vendor basefolder itempath} { + set itemtail [file rootname [file tail $itempath]] + set alias [lindex [split $itemtail @] 0] + if {$alias eq ""} { + set itemname [lindex [split $itemtail +] end] + } else { + set itemname $alias + } + if {$vendor ne "_project"} { + set itemname $vendor.$itemname + } + return $itemname + }}} + } + set arglist [concat $config $args] + my _get_itemdict {*}$arglist + } + method get_itemdict_scriptappwrappers {args} { + set config { + -templatefolder_subdir "utility/scriptappwrappers"\ + -command_get_items_from_base {apply {{base} { + + set matched_files [punk::path::treefilenames -dir $base *] + set wrappers [list] + foreach tf $matched_files { + if {[string match ignore* $tf]} { + continue + } + set ext [file extension $tf] + if {[string tolower $ext] in [list "" ".bat" ".cmd" ".sh" ".bash" ".pl" ".ps1" ".tcl"]} { + lappend wrappers $tf + } + } + return $wrappers + }}}\ + -command_get_item_name {apply {{vendor basefolder itempath} { + + set relativepath [punk::path::relative $basefolder $itempath] + set ftail [file tail $itempath] + set tname $relativepath + if {$vendor ne "_project"} { + set tname ${vendor}.$tname + } + return $tname + }}} + } + set arglist [concat $config $args] + my _get_itemdict {*}$arglist + } + method get_itemdict_moduletemplates {args} { + set config { + -templatefolder_subdir "modules"\ + -command_get_items_from_base {apply {{base} { + + set matched_files [punk::path::treefilenames -dir $base template_*.tm] + set tfiles [list] + foreach tf $matched_files { + if {[string match ignore* $tf]} { + continue + } + set ext [file extension $tf] + if {[string tolower $ext] in [list ".tm"]} { + #we will ignore any .tm files that don't have versions that tcl understands - but warn + #this reduces the cases we have to test later + set fname [file tail $tf] + lassign [split [punk::mix::cli::lib::split_modulename_version $fname]] mname ver + if {[catch {punk::mix::cli::lib::validate_modulename $mname} errM]} { + puts stderr "Invalid module name/version $tf - please rename with standard Tcl .tm module name and version (or leave out version)" + if {[string match *-* $mname]} { + puts stderr "Tcl module name cannot contain dash character - except between name and version" + } + } else { + lappend tfiles $tf + } + } + } + return $tfiles + + }}}\ + -command_get_item_name {apply {{vendor basefolder itempath} { + + set relativepath [punk::path::relative $basefolder $itempath] + set dirs [file dirname $relativepath] + if {$dirs eq "."} { + set dirs "" + } + set moduleprefix [join $dirs ::] + set ftail [file rootname [file tail $itempath]] + set tname [string range $ftail [string length template_] end] + if {$moduleprefix ne ""} { + set tname ${moduleprefix}::$tname + } + if {$vendor ne "_project"} { + set tname ${vendor}.$tname + } + return $tname + }}} + } + set arglist [concat $config $args] + my _get_itemdict {*}$arglist + } + + #shared algorithm for get_itemdict_* methods + #requires a -templatefolder_subdir indicating a directory within each template base folder in which to search + #and a file selection mechanism command -command_get_items_from_base + #and a name determining command -command_get_item_name + method _get_itemdict {args} { + set argd [punk::args::get_dict { + *proc -name _get_itemdict + *opts -anyopts 0 + -startdir -default "" + -templatefolder_subdir -optional 0 + -command_get_items_from_base -optional 0 + -command_get_item_name -optional 0 + -not -default "" -multiple 1 + *values -maxvalues -1 + globsearches -default * -multiple 1 + } $args] + set opts [dict get $argd opts] + set globsearches [dict get $argd values globsearches]; #note that in this case our globsearch won't reduce the machine's effort in scannning the filesystem - as we need to search on the renamed results + #puts stderr "=-=============>globsearches:$globsearches" + # -- --- --- --- --- --- --- --- --- + set opt_startdir [dict get $opts -startdir] + set opt_templatefolder_subdir [dict get $opts -templatefolder_subdir] + if {[file pathtype $opt_templatefolder_subdir] ne "relative"} { + error templates::_get_itemdict + } + # -- --- --- --- --- --- --- --- --- + set opt_command_get_items_from_base [dict get $opts -command_get_items_from_base] + set opt_command_get_item_name [dict get $opts -command_get_item_name] + set opt_not [dict get $opts -not] + # -- --- --- --- --- --- --- --- --- + set itembases [list] + #set tbasedict [punk::mix::base::lib::get_template_basefolders $opt_startdir] + set tbasedict [my folders -startdir $opt_startdir ] + #turn the dict into a list we can temporarily reverse sort while we expand the items from within each path + dict for {tbase folderinfo} $tbasedict { + lappend itembases [list basefolder [file join $tbase $opt_templatefolder_subdir] sourceinfo $folderinfo] + } + + set items [list] + set itemdict [dict create] + set seen_dict [dict create] + + #flip the priority order for layout folders encountered so we can set the trailing # dup/overridden indicators + foreach baseinfo [lreverse $itembases] { + set basefolder [dict get $baseinfo basefolder] + set sourceinfo [dict get $baseinfo sourceinfo] + set vendor [dict get $sourceinfo vendor] + #call the custom script from our caller which determines resultset of files we are interested in + set matches [{*}$opt_command_get_items_from_base $basefolder] + set items_here [dict create] ;#maintain a list keyed on name for sorting within this base only + foreach itempath $matches { + set itemname [{*}$opt_command_get_item_name $vendor $basefolder $itempath] + dict set items_here $itemname [list item $itempath baseinfo $baseinfo] + #lappend items [list item $itempath baseinfo $baseinfo] + } + set ordered_names [lsort [dict keys $items_here]] + #add to the outer items list + foreach nm $ordered_names { + set iteminfo [dict get $items_here $nm] + lappend items [list originalname $nm iteminfo $iteminfo] + } + } + + #append #n instance/duplicate name indicators based on cyling through entire list of found items + foreach itemrecord $items { + set oname [dict get $itemrecord originalname] + set iteminfo [dict get $itemrecord iteminfo] + set itempath [dict get $iteminfo item] + set baseinfo [dict get $iteminfo baseinfo] + if {![dict exists $seen_dict $oname]} { + dict set seen_dict $oname 1 + dict set itemdict $oname [list path $itempath {*}$baseinfo] ; #first seen of oname gets no number + } else { + set n [dict get $seen_dict $oname] + incr n + dict incr seen_dict $oname + dict set itemdict ${oname}#$n [list path $itempath {*}$baseinfo] + } + } + + #assertion path is first key of itemdict {callers are allowed to rely on it being first} + #assertion itemdict has keys path,basefolder,sourceinfo + set result [dict create] + set keys [lreverse [dict keys $itemdict]] + foreach k $keys { + set maybe "" + foreach g $globsearches { + if {[string match $g $k]} { + set maybe $k + break + } + } + set not "" + if {$maybe ne ""} { + foreach n $opt_not { + if {[string match $n $k]} { + set not $k + break + } + } + } + if {$maybe ne "" && $not eq ""} { + dict set result $k [dict get $itemdict $k] + } + + } + return $result + } + } + } + + + +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::cap::handlers::templates [namespace eval punk::cap::handlers::templates { + variable pkg punk::cap::handlers::templates + variable version + set version 0.1.0 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm new file mode 100644 index 00000000..ed4b22e4 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm @@ -0,0 +1,2361 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::char 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::char 0 0.1.0] +#[copyright "2024"] +#[titledesc {character-set and unicode utilities}] [comment {-- Name section and table of contents description --}] +#[moddesc {character-set nad unicode}] [comment {-- Description at end of page heading --}] +#[require punk::char] +#[keywords module encodings] +#[description] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::char +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::char +#[list_begin itemized] +#[item] [package {Tcl 8.6}] + +# + +#*** !doctools +#[item] [package {overtype}] +#[para] - +#[item] [package {textblock}] +#[para] - +#[item] [package console] +#[para] - + +package require Tcl 8.6- +#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review +package require textutil +package require textutil::wcswidth + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +#Note that ansi escapes can begin with \033\[ (\u001b\[) or the single character "Control Sequence Introducer" 0x9b + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::char { + tcl::namespace::export * + + variable grapheme_widths [tcl::dict::create] + # -- -------------------------------------------------------------------------- + variable encmimens ;#namespace of mime package providing reversemapencoding and mapencoding functions + #tcllib mime requires tcl::chan::memchan,events,core and/or Trf + if {![catch {package require punk::encmime} errM]} { + set encmimens ::punk::encmime + } else { + package require mime + set encmimens ::mime + } + # -- -------------------------------------------------------------------------- + + variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously + variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common? + + #just the 7-bit ascii. use [page ascii] for the 8-bit layout + proc ascii {} {return { + 00 NUL 01 SOH 02 STX 03 ETX 04 EOT 05 ENQ 06 ACK 07 BEL + 08 BS 09 HT 0a LF 0b VT 0c FF 0d CR 0e SO 0f SI + 10 DLE 11 DC1 12 DC2 13 DC3 14 DC4 15 NAK 16 SYN 17 ETB + 18 CAN 19 EM 1a SUB 1b ESC 1c FS 1d GS 1e RS 1f US + 20 SP 21 ! 22 " 23 # 24 $ 25 % 26 & 27 ' + 28 ( 29 ) 2a * 2b + 2c , 2d - 2e . 2f / + 30 0 31 1 32 2 33 3 34 4 35 5 36 6 37 7 + 38 8 39 9 3a : 3b ; 3c < 3d = 3e > 3f ? + 40 @ 41 A 42 B 43 C 44 D 45 E 46 F 47 G + 48 H 49 I 4a J 4b K 4c L 4d M 4e N 4f O + 50 P 51 Q 52 R 53 S 54 T 55 U 56 V 57 W + 58 X 59 Y 5a Z 5b [ 5c \ 5d ] 5e ^ 5f _ + 60 ` 61 a 62 b 63 c 64 d 65 e 66 f 67 g + 68 h 69 i 6a j 6b k 6c l 6d m 6e n 6f o + 70 p 71 q 72 r 73 s 74 t 75 u 76 v 77 w + 78 x 79 y 7a z 7b { 7c | 7d } 7e ~ 7f DEL + }} + + #G0 character set + proc ascii2 {} { + set dict [asciidict2] + set out "" + set i 1 + append out " " + tcl::dict::for {k v} $dict { + #single chars are wrapped with \033(0 and \033(B ie total length 7 + if {[tcl::string::length $v] == 7} { + set v " $v " + } elseif {[tcl::string::length $v] == 2} { + set v "$v " + } elseif {[tcl::string::length $v] == 0} { + set v " " + } + append out "$k $v " + if {$i > 0 && $i % 8 == 0} { + set out [tcl::string::range $out 0 end-2] + append out \n " " + } + incr i + } + set out [tcl::string::trimright $out " "] + return $out + } + + + proc symbol {} { + tailcall page symbol + } + proc dingbats {} { + set out "" + append out [page dingbats] \n + set unicode_dict [charset_dictget Dingbats] + + append out " " + set i 1 + tcl::dict::for {k charinfo} $unicode_dict { + set char [tcl::dict::get $charinfo char] + if {[tcl::string::length $char] == 0} { + set displayv " " + } elseif {[tcl::string::length $char] == 1} { + set displayv " $char " + } else { + set displayv $char + } + append out "$k $displayv " + if {$i > 0 && $i % 8 == 0} { + set out [tcl::string::range $out 0 end-2] + append out \n " " + } + incr i + } + return $out + } + proc page_names {{search *}} { + set all_names [list] + set d [page_names_dict $search] + tcl::dict::for {k v} $d { + if {$k ni $all_names} { + lappend all_names $k + } + foreach m $v { + if {$m ni $all_names} { + lappend all_names $m + } + } + } + return [lsort $all_names] + } + proc page_names_help {{namesearch *}} { + set d [page_names_dict $namesearch] + + set out "" + tcl::dict::for {k v} $d { + append out "$k $v" \n + } + return [linesort $out] + } + proc page_names_dict {{search *}} { + if {![regexp {[?*]} $search]} { + set search "*$search*" + } + set encnames [encoding names] + foreach enc $encnames { + tcl::dict::set d $enc [list] + } + variable encmimens + set mimenames [array get ${encmimens}::reversemap] + tcl::dict::for {mname encname} $mimenames { + if {$encname in $encnames} { + set enclist [tcl::dict::get $d $encname] + if {$mname ni $enclist} { + tcl::dict::lappend d $encname $mname + } + } + } + foreach enc [lsort $encnames] { + set mime_enc [${encmimens}::mapencoding $enc] + if {$mime_enc ne ""} { + set enclist [tcl::dict::get $d $enc] + if {$mime_enc ni $enclist} { + tcl::dict::lappend d $enc $mime_enc + } + } + } + set dresult [tcl::dict::create] + if {$search ne "*"} { + tcl::dict::for {k v} $d { + if {[tcl::string::match -nocase $search $k] || ([lsearch -nocase $v $search]) >= 0} { + tcl::dict::set dresult $k $v + } + } + } else { + set dresult $d + } + return $dresult + } + proc page8 {encname args} { + tcl::dict::set args -cols 8 + tailcall page $encname {*}$args + } + proc page16 {encname args} { + tcl::dict::set args -cols 16 + tailcall page $encname {*}$args + } + + #This will not display for example, c0 glyphs for cp437 + # we could use the punk::ansi::cp437_map dict - but while that might be what some expect to see - it would probably be too much magic for this function - which is intended to align more with what Tcl's encoding convertfrom/to actually does. + # for nonprinting members of the page, 2 and 3 letter codes are used rather than unicode visualisation replacements or even unicode equivalent replacements known to be appropriate for the page + proc page {encname args} { + variable invalid + set encname [encname $encname] + set defaults [list\ + -range {0 256}\ + -cols 16\ + ] + set opts [tcl::dict::merge $defaults $args] + # -- --- --- --- --- --- --- --- --- + set cols [tcl::dict::get $opts -cols] + # -- --- --- --- --- --- --- --- --- + + set d_bytedisplay [basedict_display] + + #set d_ascii [pagedict_raw ascii] + set d_ascii [basedict] + set d_asciiposn [lreverse $d_ascii] ;#values should be unique except for "???". We are mainly interested in which ones have display-names e.g cr csi + #The results of this are best seen by comparing the ebcdic and ascii pages + + set d_page [pagedict_raw $encname] + + set out "" + set i 1 + append out " " + tcl::dict::for {k rawchar} $d_page { + set num [expr {"0x$k"}] + #see if ascii equivalent exists and has a name + if {$rawchar eq $invalid} { + set displayv "$invalid" + } else { + set bytedisplay "" + if {[tcl::dict::exists $d_asciiposn $rawchar]} { + set asciiposn [tcl::dict::get $d_asciiposn $rawchar] + set bytedisplay [tcl::dict::get $d_bytedisplay $asciiposn] + } + if {$bytedisplay eq $invalid} { + # + set displayv " $rawchar " + } else { + set displaylen [tcl::string::length $bytedisplay] + if {$displaylen == 2} { + set displayv "$bytedisplay " + } elseif {$displaylen == 3} { + set displayv $bytedisplay + } else { + if {[tcl::string::length $rawchar] == 0} { + set displayv " " + } else { + #presumed 1 + set displayv " $rawchar " + } + } + } + } + + append out "$k $displayv " + if {$i > 0 && $i % $cols == 0} { + set out [tcl::string::range $out 0 end-2] + append out \n " " + } + incr i + } + set out [tcl::string::trimright $out " "] + return $out + } + + proc pagechar1 {page num} { + set encpage [encname $page] + encoding convertfrom $encpage [format %c $num] + } + + proc pagechar {page num} { + set encpage [encname $page] + + set ch [format %c $num] + if {[decodable $ch $encpage]} { + set outchar [encoding convertfrom $encpage $ch] + } else { + #here we will use \0xFFFD instead of our replacement string ??? - as the name pagechar implies always returning a single character. REVIEW. + set outchar $::punk::char::invalid_display_char + } + return $outchar + } + proc pagechar_info {page num} { + set ch [format %c $num] + set h [format %04x $num] + set encpage [encname $page] + if {[decodable $ch $encpage]} { + set outchar [encoding convertfrom $encpage $ch] + } else { + error "pagechar_info: $h not decodable from $encpage" + } + package require punk::console + puts -nonewline stdout \033\[s;flush stdout + lassign [punk::console::get_cursor_pos_list] _row1 col1 + puts -nonewline stdout "$outchar";flush stdout + lassign [punk::console::get_cursor_pos_list] _row2 col2 + puts -nonewline stdout "\033\[u";flush stdout + return "$col1 -> $col2" + } + + proc pagebyte {page num} { + set encpage [encname $page] + + set ch [format %c $num] + if {[decodable $ch $encpage]} { + #set outchar [encoding convertto $encpage [format %c $num]] + set outchar [format %c $num] + } else { + set outchar $::punk::char::invalid_display_char + } + return $outchar + } + + proc all_pages {} { + set out "" + set mimenamesdict [page_names_dict] + foreach encname [encoding names] { + if {[tcl::dict::exists $mimenamesdict $encname]} { + set alt "([tcl::dict::get $mimenamesdict $encname])" + } else { + set alt "" + } + append out "$encname $alt" \n + append out [page $encname] + } + return $out + } + + proc encname {encoding_name_or_alias} { + set encname $encoding_name_or_alias + if {[lsearch -nocase [page_names] $encname] < 0} { + error "Unknown encoding '$encname' - use 'punk::char::page_names' to see valid encoding names on this system" + } + variable encmimens + if {$encname ni [encoding names]} { + set encname [${encmimens}::reversemapencoding $encname] + } + return $encname + } + + proc pagedict_raw {encname} { + variable invalid ;# ="???" + set encname [encname $encname] + set d [tcl::dict::create] + for {set i 0} {$i < 256} {incr i} { + set k [format %02x $i] + #tcl::dict::set d $k [encoding convertfrom $encname [format %c $i]] + set ch [format %c $i] ; + #jmn + if {[decodable $ch $encname]} { + #set encchar [encoding convertto $encname $ch] + #tcl::dict::set d $k [encoding convertfrom $encchar] + tcl::dict::set d $k [encoding convertfrom $encname $ch] + } else { + tcl::dict::set d $k $invalid ;#use replacement so we can detect difference from actual "?" + } + } + return $d + } + proc asciidict {} { + variable invalid + set d [tcl::dict::create] + set a128 [asciidict128] + for {set i 0} {$i < 256} {incr i} { + set k [format %02x $i] + if {$i <= 127} { + tcl::dict::set d $k [tcl::dict::get $a128 $k] + } else { + # + tcl::dict::set d $k $invalid + } + + if {$i <=32} { + #no point emitting the lower control characters raw to screen - emit the short-names defined in the 'ascii' proc + tcl::dict::set d $k [tcl::dict::get $a128 $k] + } else { + if {$i == 0x9b} { + tcl::dict::set d $k CSI ;#don't emit the ansi 'Control Sequence Introducer' - or it will be interpreted by the console and affect the layout. + } else { + tcl::dict::set d $k [format %c $i] + } + } + } + return $d + } + + proc basedict_display {} { + set d [tcl::dict::create] + set a128 [asciidict128] + for {set i 0} {$i < 256} {incr i} { + set k [format %02x $i] + if {$i <=32} { + #no point emitting the lower control characters raw to screen - emit the short-names defined in the 'ascii' proc + tcl::dict::set d $k [tcl::dict::get $a128 $k] + } else { + if {$i == 0x9b} { + tcl::dict::set d $k CSI ;#don't emit the ansi 'Control Sequence Introducer' - or it will be interpreted by the console and affect the layout. + } elseif {$i == 0x9c} { + tcl::dict::set d $k OSC + } else { + #tcl::dict::set d $k [encoding convertfrom [encoding system] [format %c $i]] + #don't use encoding convertfrom - we want the value independent of the current encoding system. + tcl::dict::set d $k [format %c $i] + } + } + } + return $d + } + proc basedict_encoding_system {} { + #result depends on 'encoding system' currently in effect + set d [tcl::dict::create] + for {set i 0} {$i < 256} {incr i} { + set k [format %02x $i] + tcl::dict::set d $k [encoding convertfrom [encoding system] [format %c $i]] + } + return $d + } + + proc basedict {} { + #this gives same result independent of current value of 'encoding system' + set d [tcl::dict::create] + for {set i 0} {$i < 256} {incr i} { + set k [format %02x $i] + tcl::dict::set d $k [format %c $i] + } + return $d + } + proc pagedict {pagename args} { + variable charsets + set encname [encname $pagename] + set defaults [list\ + -range {0 255}\ + -charset ""\ + ] + set opts [tcl::dict::merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- + set range [tcl::dict::get $opts -range] + set charset [tcl::dict::get $opts -charset] + # -- --- --- --- --- --- --- --- --- --- + if {$charset ne ""} { + if {$charset ni [charset_names]} { + error "unknown charset '$charset' - use 'charset_names' to get list" + } + set setinfo [tcl::dict::get $charsets $charset] + set ranges [tcl::dict::get $setinfo ranges] + set charset_dict [tcl::dict::create] + foreach r $ranges { + set start [tcl::dict::get $r start] + set end [tcl::dict::get $r end] + #set charset_dict [tcl::dict::merge $charset_dict [char_range_dict $start $end]] + break + } + + } else { + set start [lindex $range 0] + set end [lindex $range 1] + } + + set d [tcl::dict::create] + for {set i $start} {$i <= $end} {incr i} { + set k [format %02x $i] + tcl::dict::set d $k [encoding convertfrom $encname [format %c $i]] + } + return $d + } + + #todo - benchmark peformance - improve punk pipeline + proc asciidict128 {} { + regexp -all -inline {\S+} [concat {*}[linelist -line trimleft [ascii]]] + } + proc _asciidict128 {} { + .= ascii |> .=> linelist -line trimleft |> .=* concat |> {regexp -all -inline {\S+} $data} + } + + #review - use terminal to display actual supported DEC specials vs using dict at: punk::ansi::map_special_graphics which maps to known unicode equivalents + proc asciidict2 {} { + set d [tcl::dict::create] + tcl::dict::for {k v} [basedict_display] { + if {[tcl::string::length $v] == 1} { + set num [expr {"0x$k"}] + #tcl::dict::set d $k "\033(0[subst \\u00$k]\033(B" + tcl::dict::set d $k "\033(0[format %c $num]\033(B" + } else { + tcl::dict::set d $k $v + } + } + return $d + } + + #-- --- --- --- --- --- --- --- + # encoding convertfrom & encoding convertto can be somewhat confusing to think about. (Need to think in reverse.) + # e.g encoding convertto dingbats will output something that doesn't look dingbatty on screen. + #-- --- --- --- --- --- --- --- + #must use Tcl instead of tcl (at least for 8.6) + if {![package vsatisfies [package present Tcl] 8.7-]} { + proc encodable "s {enc [encoding system]}" { + set encname [encname $enc] + if {($encname eq "ascii")} { + #8.6 fails to round-trip convert 0x7f del character despite it being in the ascii range (review Why?? what else doesn't round-trip but should?) + #just strip it out of the string as we are only after a boolean answer and if s is only a single del char empty string will result in true + set s [tcl::string::map [list [format %c 0x7f] ""] $s] + } + string eq $s [encoding convertfrom $encname [encoding convertto $encname $s]] + } + #note also that tcl8.6 has anomalies with how it handles some unassigned codepoints + # e.g unassigned codes in the middle of a codepage may appear to be encodable&decodable in a round trip whereas undefined codepoints at the end may get the replacement character defined in the tcl encodings dir (usually the 3f char: "?") + proc decodable "s {enc [encoding system]}" { + set encname [encname $enc] + #review + string eq $s [encoding convertto $encname [encoding convertfrom $encname $s]] + } + } else { + proc encodable "s {enc [encoding system]}" { + set encname [encname $enc] + string eq $s [encoding convertfrom $encname [encoding convertto $encname $s]] + } + proc decodable "s {enc [encoding system]}" { + set encname [encname $enc] + string eq $s [encoding convertto $encname [encoding convertfrom $encname $s]] + } + } + #-- --- --- --- --- --- --- --- + proc test_japanese {{encoding jis0208}} { + #A very basic test of 2char encodings such as jis0208 + set yatbun 日本 ;# encoding convertfrom jis0208 F|K\\ + lassign [split $yatbun] yat bun + puts "original yatbun ${yat} ${bun}" + set eyat [encoding convertto $encoding $yat] + set ebun [encoding convertto $encoding $bun] + puts "$encoding encoded: ${eyat} ${ebun}" + puts "reencoded: [encoding convertfrom $encoding $eyat] [encoding convertfrom $encoding $ebun]" + return $yatbun + } + proc test_grave {} { + set g [format %c 0x300] + puts stdout "Testing console display of grave accented a in between letters x and y - accent should combine over the top of the letter a." + puts stdout "Apparent width should theoretically be 1 console-column" + package require punk::console + puts stdout "# -- --- --- ---" + puts -nonewline "xa${g}z";set cursorposn [punk::console::get_cursor_pos] + puts stdout \n + puts stdout "cursor position immediately after outputing 4 bytes (expecting 3 glyphs): $cursorposn" + puts stdout "# -- --- --- ---" + puts -nonewline "xyz";set cursorposn [punk::console::get_cursor_pos] + puts stdout \n + puts stdout "cursor position immediately after outputing 3 bytes (xyz): $cursorposn" + } + proc test_farmer {} { + #an interesting article re grapheme clustering problems in terminals https://mitchellh.com/writing/grapheme-clusters-in-terminals + #(similar to the problem with grave accent rendering width that the test_grave proc is written for) + # -- --- --- --- --- + #These pasted glyphs can display in console even when the unicode versions don't (tcl 8.6 limited to 65533/FFFD ?) + upvar farmer1_paste test_farmer1 + upvar farmer2_paste test_farmer2 + set test_farmer1 🧑‍🌾 ;#contains zero-width joiner between + set test_farmer2 🧑🌾 + puts "pasted farmer1 exporting as var farmer1_paste: $test_farmer1" + puts "pasted farmer2 exporting as var farmer2_paste: $test_farmer2" + # -- --- --- --- --- + + + set farmer1 "\U0001f9d1\U0000200d\U0001f33e" + set farmer2 "\U0001f9d1\U0001f33e" + puts stdout "farmer1 with zero-width joiner, codes: \\U0001f9d1\\U0000200d\\U0001f33e : $farmer1" + puts stdout "farmer2 with no joiner codes: \\U0001f9d1\\U001f33e : $farmer2" + + package require punk::console + puts stdout "#2--5---9---C---" + puts -nonewline stdout \033\[2K\033\[1G ;#2K erase line 1G cursor at col1 + puts -nonewline "${farmer1}";set cursorposn [punk::console::get_cursor_pos] + puts stdout \n + puts stdout "cursor position immediately after outputing farmer1 (expecting 1 glyph 2 wide) : $cursorposn" + puts stdout "#2--5---9---C---" + puts -nonewline "${farmer2}";set cursorposn [punk::console::get_cursor_pos] + puts stdout \n + puts stdout "cursor position immediately after outputing farmer2 (expecting 2 glyphs 4 wide in total): $cursorposn" + + return [list $farmer1 $farmer2] + } + + #G0 Sets Sequence G1 Sets Sequence Meaning + #ESC ( A ESC ) A United Kingdom Set + #ESC ( B ESC ) B ASCII Set + #ESC ( 0 ESC ) 0 Special Graphics + #ESC ( 1 ESC ) 1 Alternate Character ROM Standard Character Set + #ESC ( 2 ESC ) 2 Alternate Character ROM Special Graphic + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + # Unicode character sets - some hardcoded - some loadable from data files + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + variable charinfo [tcl::dict::create] + variable charsets [tcl::dict::create] + + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + # Aggregate character sets (ones that pick various ranges from underlying unicode ranges) + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + tcl::dict::set charsets "WGL4" [list altname "Windows Glyph List 4" ranges [list\ + {start 0 end 127 name "basic latin"}\ + {start 128 end 255 name "latin-1 supplement"}\ + {start 256 end 383 name "Latin Extended-A"}\ + {start 402 end 402 name "subset Latin Extended-B"}\ + {start 506 end 511 name "subset Latin Extended-B"}\ + {start 710 end 711 name "subset Spacing Modifier Letters"}\ + {start 713 end 713 name "subset Spacing Modifier Letters"}\ + {start 728 end 733 name "subset Spacing Modifier Letters"}\ + {start 900 end 906 name "subset Greek"}\ + {start 908 end 908 name "subset Greek"}\ + {start 910 end 974 name "subset Greek"}\ + {start 1024 end 1119 name "subset Cyrillic"}\ + {start 1168 end 1169 name "subset Cyrillic"}\ + {start 7808 end 7813 name "subset Latin Extended Additional"}\ + {start 7922 end 7923 name "subset Latin Extended Additional"}\ + {start 8211 end 8213 name "subset General Punctuation"}\ + {start 8215 end 8222 name "subset General Punctuation"}\ + {start 8224 end 8226 name "subset General Punctuation"}\ + {start 8230 end 8230 name "subset General Punctuation"}\ + {start 8240 end 8240 name "subset General Punctuation"}\ + {start 8242 end 8243 name "subset General Punctuation"}\ + ] description "Microsoft WGL4 Repertoire" settype "other"] + + + + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #The base page 0-256 8bit range - values don't have specific characters or descriptions - as they are codepage dependent + #we will fill this here for completeness - but with placeholders + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + tcl::dict::set charsets "8bit" [list ranges [list {start 0 end 127 name ascii} {start 128 end 255 name 8bit-ascii}] description "8bit extended ascii range" settype "other"] + for {set i 0} {$i < 256} {incr i} { + tcl::dict::set charinfo $i [list desc "codepage-dependent" short "byte_[format %02x $i]"] + } + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + # Unicode ranges + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + tcl::dict::set charsets "greek" [list ranges [list {start 880 end 1023} {start 7936 end 8191}] description "Greek and Coptic" settype "other"] + + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + tcl::dict::set charsets "Block Elements" [list ranges [list {start 9600 end 9631}] description "Block Elements" settype "other"] + tcl::dict::set charinfo 9600 [list desc "Upper Half Block" short "blocke_up_half"] + tcl::dict::set charinfo 9601 [list desc "Lower One Eighth Block" short "blocke_lw_1_8th"] + tcl::dict::set charinfo 9602 [list desc "Lower One Quarter Block" short "blocke_lw_1_qtr"] + + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + tcl::dict::set charsets "Dingbats" [list ranges [list {start 9984 end 10175 }] description "Unicode Dingbats" settype "tcl_supplemented"] + tcl::dict::set charinfo 9984 [list desc "Black Safety Scissors" short "dingbats_black_safety_scissors"] + #... + tcl::dict::set charinfo 10175 [list desc "Double Curly Loop" short "dingbats_double_curly_loop"] + + + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #variation selectors 0xFe01 - 0xFE0F + tcl::dict::set charsets "Variation Selectors" [list ranges [list {start 65024 end 65039}] description "Variation Selectors" note "combining character with previous char - variant glyph display" settype "tcl_supplemented"] + tcl::dict::set charinfo 65024 [list desc "Variation Selector-1" short "VS1"] + tcl::dict::set charinfo 65025 [list desc "Variation Selector-2" short "VS2"] + tcl::dict::set charinfo 65026 [list desc "Variation Selector-3" short "VS3"] + tcl::dict::set charinfo 65027 [list desc "Variation Selector-4" short "VS4"] + tcl::dict::set charinfo 65027 [list desc "Variation Selector-5" short "VS5"] + tcl::dict::set charinfo 65029 [list desc "Variation Selector-6" short "VS6"] + tcl::dict::set charinfo 65030 [list desc "Variation Selector-7" short "VS7"] + tcl::dict::set charinfo 65031 [list desc "Variation Selector-8" short "VS8"] + tcl::dict::set charinfo 65032 [list desc "Variation Selector-9" short "VS9"] + tcl::dict::set charinfo 65033 [list desc "Variation Selector-10" short "VS10"] + tcl::dict::set charinfo 65034 [list desc "Variation Selector-11" short "VS11"] + tcl::dict::set charinfo 65035 [list desc "Variation Selector-12" short "VS12"] + tcl::dict::set charinfo 65036 [list desc "Variation Selector-13" short "VS13"] + tcl::dict::set charinfo 65037 [list desc "Variation Selector-14" short "VS14"] + tcl::dict::set charinfo 65038 [list desc "Variation Selector-15 text variation" short "VS15"] ;#still an image - just more suitable for text-presentation e.g word-processing doc + tcl::dict::set charinfo 65039 [list desc "Variation Selector-16 emoji variation" short "VS16"] + + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + # emoticons https://www.unicode.org/charts/PDF/U1F600.pdf + tcl::dict::set charsets "Emoticons" [list ranges [list {start 128512 end 128591}] description "Emoticons" settype "tcl_supplemented"] + tcl::dict::set charinfo 128512 [list desc "Grinning Face" short "emoticon_gface"] + tcl::dict::set charinfo 128513 [list desc "Grinning Face with Smiling Eyes" short "emoticon_gface_smile_eyes"] + tcl::dict::set charinfo 128514 [list desc "Face with Tears of Joy" short "emoticon_face_tears_joy"] + + #todo + tcl::dict::set charinfo 128590 [list desc "Person with Pouting Face" short "emoticon_person_pout"] + + # -- -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + tcl::dict::set charsets "Box Drawing" [list ranges [list {start 9472 end 9599}] description "Box Drawing" settype "tcl_supplemented"] + tcl::dict::set charinfo 9472 [list desc "Box Drawings Light Horizontal" short "boxd_lhz"] + tcl::dict::set charinfo 9473 [list desc "Box Drawings Heavy Horizontal" short "boxd_hhz"] + tcl::dict::set charinfo 9474 [list desc "Box Drawings Light Vertical" short "boxd_lv"] + tcl::dict::set charinfo 9475 [list desc "Box Drawings Heavy Vertical" short "boxd_hv"] + tcl::dict::set charinfo 9476 [list desc "Box Drawings Light Triple Dash Horizontal" short "boxd_ltdshhz"] + tcl::dict::set charinfo 9477 [list desc "Box Drawings Heavy Triple Dash Horizontal" short "boxd_htdshhz"] + tcl::dict::set charinfo 9478 [list desc "Box Drawings Light Triple Dash Vertical" short "boxd_ltdshv"] + tcl::dict::set charinfo 9479 [list desc "Box Drawings Heavy Triple Dash Vertical" short "boxd_htdshv"] + tcl::dict::set charinfo 9480 [list desc "Box Drawings Light Quadruple Dash Horizontal" short "boxd_lqdshhz"] + tcl::dict::set charinfo 9481 [list desc "Box Drawings Heavy Quadruple Dash Horizontal" short "boxd_hqdshhz"] + tcl::dict::set charinfo 9482 [list desc "Box Drawings Light Quadruple Dash Vertical" short "boxd_lqdshv"] + tcl::dict::set charinfo 9483 [list desc "Box Drawings Heavy Quadruple Dash Vertical" short "boxd_hqdshv"] + tcl::dict::set charinfo 9484 [list desc "Box Drawings Light Down and Right" short "boxd_ldr"] + tcl::dict::set charinfo 9485 [list desc "Box Drawings Down Light and Right Heavy" short "boxd_dlrh"] + tcl::dict::set charinfo 9486 [list desc "Box Drawings Down Heavy and Right Light" short "boxd_dhrl"] + tcl::dict::set charinfo 9487 [list desc "Box Drawings Heavy Down and Right" short "boxd_hdr"] + tcl::dict::set charinfo 9488 [list desc "Box Drawings Light Down and Left" short "boxd_ldl"] + tcl::dict::set charinfo 9489 [list desc "Box Drawings Down Light and Left Heavy" short "boxd_dllh"] + tcl::dict::set charinfo 9490 [list desc "Box Drawings Down Heavy and Left Light" short "boxd_dhll"] + tcl::dict::set charinfo 9491 [list desc "Box Drawings Heavy Down and Left" short "boxd_hdl"] + tcl::dict::set charinfo 9492 [list desc "Box Drawings Light Up and Right" short "boxd_lur"] + tcl::dict::set charinfo 9493 [list desc "Box Drawings Up Light and Right Heavy" short "boxd_ulrh"] + tcl::dict::set charinfo 9494 [list desc "Box Drawings Up Heavy and Right Light" short "boxd_uhrl"] + tcl::dict::set charinfo 9495 [list desc "Box Drawings Heavy Up and Right" short "boxd_hur"] + tcl::dict::set charinfo 9496 [list desc "Box Drawings Light Up and Left" short "boxd_lul"] + tcl::dict::set charinfo 9497 [list desc "Box Drawings Up Light and Left Heavy" short "boxd_ullh"] + tcl::dict::set charinfo 9498 [list desc "Box Drawings Up Heavy and Left Light" short "boxd_uhll"] + tcl::dict::set charinfo 9499 [list desc "Box Drawings Heavy Up and Left" short "boxd_hul"] + tcl::dict::set charinfo 9500 [list desc "Box Drawings Light Vertical and Right" short "boxd_lvr"] + tcl::dict::set charinfo 9501 [list desc "Box Drawings Vertical Light and Right Heavy" short "boxd_vlrh"] + tcl::dict::set charinfo 9502 [list desc "Box Drawings Up Heavy and Right Down Light" short "boxd_uhrdl"] + tcl::dict::set charinfo 9503 [list desc "Box Drawings Down Heavy and Right Up Light" short "boxd_dhrul"] + tcl::dict::set charinfo 9504 [list desc "Box Drawings Vertical Heavy and Right Light" short "boxd_vhrl"] + tcl::dict::set charinfo 9505 [list desc "Box Drawings Down Light and Right Up Heavy" short "boxd_dlruh"] + tcl::dict::set charinfo 9506 [list desc "Box Drawings Up Light and Right Down Heavy" short "boxd_ulrdh"] + tcl::dict::set charinfo 9507 [list desc "Box Drawings Heavy Vertical and Right" short "boxd_hvr"] + tcl::dict::set charinfo 9508 [list desc "Box Drawings Light Vertical and Left" short "boxd_lvl"] + tcl::dict::set charinfo 9509 [list desc "Box Drawings Vertical Light and Left Heavy" short "boxd_vllh"] + tcl::dict::set charinfo 9510 [list desc "Box Drawings Up Heavy and Let Down Light" short "boxd_uhldl"] + tcl::dict::set charinfo 9511 [list desc "Box Drawings Down Heavy and Left Up Light" short "boxd_dhlul"] + tcl::dict::set charinfo 9512 [list desc "Box Drawings Vertical Heavy and Left Light" short "boxd_vhll"] + tcl::dict::set charinfo 9513 [list desc "Box Drawings Down Light and left Up Heavy" short "boxd_dlluh"] + tcl::dict::set charinfo 9514 [list desc "Box Drawings Up Light and Left Down Heavy" short "boxd_ulldh"] + tcl::dict::set charinfo 9515 [list desc "Box Drawings Heavy Vertical and Left" short "boxd_hvl"] + tcl::dict::set charinfo 9516 [list desc "Box Drawings Light Down and Horizontal" short "boxd_ldhz"] + tcl::dict::set charinfo 9517 [list desc "Box Drawings Left Heavy and Right Down Light" short "boxd_lhrdl"] + tcl::dict::set charinfo 9518 [list desc "Box Drawings Right Heavy and Left Down Light" short "boxd_rhldl"] + tcl::dict::set charinfo 9519 [list desc "Box Drawings Down Light and Horizontal Heavy" short "boxd_dlhzh"] + tcl::dict::set charinfo 9520 [list desc "Box Drawings Down Heavy and Horizontal Light" short "boxd_dhhzl"] + tcl::dict::set charinfo 9521 [list desc "Box Drawings Right Light and Left Down Heavy" short "boxd_rlldh"] + tcl::dict::set charinfo 9522 [list desc "Box Drawings Left Light and Right Down Heavy" short "boxd_llrdh"] + tcl::dict::set charinfo 9523 [list desc "Box Drawings Heavy Down and Horizontal" short "boxd_hdhz"] + tcl::dict::set charinfo 9524 [list desc "Box Drawings Light Up and Horizontal" short "boxd_luhz"] + tcl::dict::set charinfo 9525 [list desc "Box Drawings Left Heavy and Right Up Light" short "boxd_lhrul"] + tcl::dict::set charinfo 9526 [list desc "Box Drawings Right Heavy and Left Up Light" short "boxd_rhlul"] + tcl::dict::set charinfo 9527 [list desc "Box Drawings Up Light and Horizontal Heavy" short "boxd_ulhzh"] + tcl::dict::set charinfo 9528 [list desc "Box Drawings Up Heavy and Horizontal Light" short "boxd_uhhzl"] + tcl::dict::set charinfo 9529 [list desc "Box Drawings Right Light and Left Up Heavy" short "boxd_rlluh"] + tcl::dict::set charinfo 9530 [list desc "Box Drawings Left Light and Right Up Heavy" short "boxd_llruh"] + tcl::dict::set charinfo 9531 [list desc "Box Drawings Heavy Up and Horizontal" short "boxd_huhz"] + tcl::dict::set charinfo 9532 [list desc "Box Drawings Light Vertical and Horizontal" short "boxd_lvhz"] + tcl::dict::set charinfo 9533 [list desc "Box Drawings Left Heavy and Right Vertical Light" short "boxd_lhrvl"] + tcl::dict::set charinfo 9534 [list desc "Box Drawings Right Heavy and Left Vertical Light" short "boxd_rhlvl"] + tcl::dict::set charinfo 9535 [list desc "Box Drawings Vertical Light and Horizontal Heavy" short "boxd_vlhzh"] + tcl::dict::set charinfo 9536 [list desc "Box Drawings Up Heavy and Down Horizontal Light" short "boxd_uhdhzl"] + tcl::dict::set charinfo 9537 [list desc "Box Drawings Down Heavy and Up Horizontal Light" short "boxd_dhuhzl"] + tcl::dict::set charinfo 9538 [list desc "Box Drawings Vertical Heavy and Horizontal Light" short "boxd_vhhzl"] + tcl::dict::set charinfo 9539 [list desc "Box Drawings Left Up Heavy and Right Down Light" short "boxd_luhrdl"] + tcl::dict::set charinfo 9540 [list desc "Box Drawings Right Up Heavy and Left Down Light" short "boxd_ruhldl"] + tcl::dict::set charinfo 9541 [list desc "Box Drawings Left Down Heavy and Right Up Light" short "boxd_ldhrul"] + tcl::dict::set charinfo 9542 [list desc "Box Drawings Right Down Heavy and Left Up Light" short "boxd_rdhlul"] + tcl::dict::set charinfo 9543 [list desc "Box Drawings Down Light and Up Horizontal Heavy" short "boxd_dluhzh"] + tcl::dict::set charinfo 9544 [list desc "Box Drawings Up Light and Down Horizontal Heavy" short "boxd_dldhzh"] + tcl::dict::set charinfo 9545 [list desc "Box Drawings Right Light and Left Vertical Heavy" short "boxd_rllvh"] + tcl::dict::set charinfo 9546 [list desc "Box Drawings Left Light and Right Vertical Heavy" short "boxd_llrvh"] + tcl::dict::set charinfo 9547 [list desc "Box Drawings Heavy Vertical and Horizontal" short "boxd_hvhz"] + tcl::dict::set charinfo 9548 [list desc "Box Drawings Light Double Dash Horizontal" short "boxd_lddshhz"] + tcl::dict::set charinfo 9549 [list desc "Box Drawings Heavy Double Dash Horizontal" short "boxd_hddshhz"] + tcl::dict::set charinfo 9550 [list desc "Box Drawings Light Double Dash Vertical" short "boxd_lddshv"] + tcl::dict::set charinfo 9551 [list desc "Box Drawings Heavy Double Dash Vertical" short "boxd_hddshv"] + tcl::dict::set charinfo 9552 [list desc "Box Drawings Double Horizontal" short "boxd_dhz"] + tcl::dict::set charinfo 9553 [list desc "Box Drawings Double Vertical" short "boxd_dv"] + tcl::dict::set charinfo 9554 [list desc "Box Drawings Down Single and Right Double" short "boxd_dsrd"] + tcl::dict::set charinfo 9555 [list desc "Box Drawings Down Double and Right Single" short "boxd_ddrs"] + tcl::dict::set charinfo 9556 [list desc "Box Drawings Double Down and Right" short "boxd_ddr"] + tcl::dict::set charinfo 9557 [list desc "Box Drawings Down Single and Left Double" short "boxd_dsld"] + tcl::dict::set charinfo 9558 [list desc "Box Drawings Down Double and Left Single" short "boxd_ddls"] + tcl::dict::set charinfo 9559 [list desc "Box Drawings Double Down and Left" short "boxd_ddl"] + tcl::dict::set charinfo 9560 [list desc "Box Drawings Up Single and Right Double" short "boxd_usrd"] + tcl::dict::set charinfo 9561 [list desc "Box Drawings Up Double and Right Single" short "boxd_udrs"] + tcl::dict::set charinfo 9562 [list desc "Box Drawings Double Up and Right" short "boxd_dur"] + tcl::dict::set charinfo 9563 [list desc "Box Drawings Up Single and Left Double" short "boxd_usld"] + tcl::dict::set charinfo 9564 [list desc "Box Drawings Up Double and Left Single" short "boxd_udls"] + tcl::dict::set charinfo 9565 [list desc "Box Drawings Double Up and Left" short "boxd_dul"] + tcl::dict::set charinfo 9566 [list desc "Box Drawings Vertical Single and Right Double" short "boxd_vsrd"] + tcl::dict::set charinfo 9567 [list desc "Box Drawings Vertical Double and Right Single" short "boxd_vdrs"] + tcl::dict::set charinfo 9568 [list desc "Box Drawings Double Vertical and Right" short "boxd_dvr"] + tcl::dict::set charinfo 9569 [list desc "Box Drawings Vertical Single and Left Double" short "boxd_vsld"] + tcl::dict::set charinfo 9570 [list desc "Box Drawings Vertical Double and Left Single" short "boxd_vdls"] + tcl::dict::set charinfo 9571 [list desc "Box Drawings Double Vertical and Left" short "boxd_dvl"] + tcl::dict::set charinfo 9572 [list desc "Box Drawings Down Single and Horizontal Double" short "boxd_dshzd"] + tcl::dict::set charinfo 9573 [list desc "Box Drawings Down Double and Horizontal Single" short "boxd_ddhzs"] + tcl::dict::set charinfo 9574 [list desc "Box Drawings Double Down and Horizontal" short "boxd_ddhz"] + tcl::dict::set charinfo 9575 [list desc "Box Drawings Up Single and Horizontal Double" short "boxd_ushzd"] + tcl::dict::set charinfo 9576 [list desc "Box Drawings Up Double and Horizontal Single" short "boxd_udhzs"] + tcl::dict::set charinfo 9577 [list desc "Box Drawings Double Up and Horizontal" short "boxd_duhz"] + tcl::dict::set charinfo 9578 [list desc "Box Drawings Vertical Single and Horizontal Double" short "boxd_vshzd"] + tcl::dict::set charinfo 9579 [list desc "Box Drawings Vertical Double and Horizontal Single" short "boxd_vdhzs"] + tcl::dict::set charinfo 9580 [list desc "Box Drawings Double Vertical and Horizontal" short "boxd_dvhz"] + tcl::dict::set charinfo 9581 [list desc "Box Drawings Light Arc Down and Right" short "boxd_ladr"] + tcl::dict::set charinfo 9582 [list desc "Box Drawings Light Arc Down and Left" short "boxd_ladl"] + tcl::dict::set charinfo 9583 [list desc "Box Drawings Light Arc Up and Left" short "boxd_laul"] + tcl::dict::set charinfo 9584 [list desc "Box Drawings Light Arc Up and Right" short "boxd_laur"] + tcl::dict::set charinfo 9585 [list desc "Box Drawings Light Diagonal Upper Right To Lower Left" short "boxd_ldgurll"] + tcl::dict::set charinfo 9586 [list desc "Box Drawings Light Diagonal Upper Left To Lower Right" short "boxd_ldgullr"] + tcl::dict::set charinfo 9587 [list desc "Box Drawings Light Diagonal Cross" short "boxd_ldc"] + tcl::dict::set charinfo 9588 [list desc "Box Drawings Light Left" short "boxd_ll"] + tcl::dict::set charinfo 9589 [list desc "Box Drawings Light Up" short "boxd_lu"] + tcl::dict::set charinfo 9590 [list desc "Box Drawings Light Right" short "boxd_lr"] + tcl::dict::set charinfo 9591 [list desc "Box Drawings Light Down" short "boxd_ld"] + tcl::dict::set charinfo 9592 [list desc "Box Drawings Heavy Left" short "boxd_hl"] + tcl::dict::set charinfo 9593 [list desc "Box Drawings Heavy Up" short "boxd_hu"] + tcl::dict::set charinfo 9594 [list desc "Box Drawings Heavy Right" short "boxd_hr"] + tcl::dict::set charinfo 9595 [list desc "Box Drawings Heavy Down" short "boxd_hd"] + tcl::dict::set charinfo 9596 [list desc "Box Drawings Light Left and Heavy Right" short "boxd_llhr"] + tcl::dict::set charinfo 9597 [list desc "Box Drawings Light Up and Heavy Down" short "boxd_luhd"] + tcl::dict::set charinfo 9598 [list desc "Box Drawings Heavy Left and Light Right" short "boxd_hllr"] + tcl::dict::set charinfo 9599 [list desc "Box Drawings Heavy Up and Light Down" short "boxd_huld"] + + + tcl::dict::set charsets "Halfwidth and Fullwidth Forms" [list ranges [list {start 65280 end 65519}] description "Halfwidth and Fullwidth Forms (variants)" settype "tcl_supplemental"] + tcl::dict::set charsets "ascii_fullwidth" [list ranges [list {start 65281 end 65374}] description "Ascii 21 to 7E fullwidth" parentblock "halfwidth_and_fullwidth_forms" settype "other"] + + tcl::dict::set charsets "Specials" [list ranges [list {start 65520 end 65535}] description "Specials" settype "tcl_supplemental"] + + tcl::dict::set charsets "noncharacters" [list ranges [list\ + {start 64976 end 65007 note "BMP FDD0..FDEF"}\ + {start 65534 end 65535 note "BMP FFFE,FFFF"}\ + {start 131070 end 131071 note "plane1 1FFFE,1FFFF"}\ + {start 196606 end 196607 note "plane2 2FFFE,2FFFF"}\ + {start 262142 end 262143 note "plane3 3FFFE,3FFFF"}\ + {start 327678 end 327679 note "plane4 4FFFE,4FFFF"}\ + {start 393214 end 393215 note "plane5 5FFFE,5FFFF"}\ + {start 458750 end 458751 note "plane6 6FFFE,6FFFF"}\ + {start 524286 end 524287 note "plane7 7FFFE,7FFFF"}\ + {start 589822 end 589823 note "plane8 8FFFE,8FFFF"}\ + {start 655358 end 655359 note "plane9 9FFFE,9FFFF"}\ + {start 720894 end 720895 note "plane10 AFFFE,AFFFF"}\ + {start 786430 end 786431 note "plane11 BFFFE,BFFFF"}\ + {start 851966 end 851967 note "plane12 CFFFE,CFFFF"}\ + {start 917502 end 917503 note "plane13 DFFFE,DFFFF"}\ + {start 983038 end 983039 note "plane14 EFFFE,EFFFF"}\ + {start 1048574 end 1048575 note "plane15 FFFFE,FFFFF"}\ + {start 1114110 end 1114111 note "plane16 10FFFE,10FFFF"}\ + ] description "non-characters" settype "tcl_supplemental"] + + #build dicts keyed on short + variable charshort + proc _build_charshort {} { + variable charshort + set charshort [tcl::dict::create] + variable charinfo + tcl::dict::for {k v} $charinfo { + if {[tcl::dict::exists $v short]} { + set sh [tcl::dict::get $v short] + if {[tcl::dict::exists $charshort $sh]} { + puts stderr "_build_charshort WARNING character data load duplicate shortcode '$sh'" + } + tcl::dict::set charshort $sh [format %c $k] + } + } + return [tcl::dict::size $charshort] + } + _build_charshort + + variable charset_extents_startpoints ;#stores endpoints associated with each startpoint - but named after key which is startpoint. + variable charset_extents_endpoints ;#stores startpoints assoicated with each endpoint - but named after key which is endpoint. + variable charset_extents_rangenames ;# dict keyed on start,end pointing to list of 2-tuples {setname rangeindex_within_set} + #build 2 indexes for each range.(charsets are not just unicode blocks so can have multiple ranges) + #Note that a range could be as small as a single char (startpoint = endpoint) so there can be many ranges with same start and end if charsets use some of the same subsets. + #as each charset_extents_startpoins,charset_extents_endpoints is built - the associated range name and index is appended to the rangenames dict + #startpoints - key is startpoint of a range, value is list of endpoints one for each range starting at this startpoint-key + #endpoints - key is endpoint of a range, value is list of startpoints one for each range ending at this endpoint-key + proc _build_charset_extents {} { + variable charsets + variable charset_extents_startpoints + variable charset_extents_endpoints + variable charset_extents_rangenames + set charset_extents_startpoints [tcl::dict::create] + set charset_extents_endpoints [tcl::dict::create] + set charset_extents_rangenames [tcl::dict::create] + tcl::dict::for {setname setinfo} $charsets { + set ranges [tcl::dict::get $setinfo ranges] + if {[tcl::dict::get $setinfo settype] eq "block"} { + #unicode block must have a single range + #we consider a char a member of the block even if unassigned/reserved (as per unicode documentation) + set start [tcl::dict::get [lindex $ranges 0] start] + set end [tcl::dict::get [lindex $ranges 0] end] + if {![tcl::dict::exists $charset_extents_startpoints $start] || $end ni [tcl::dict::get $charset_extents_startpoints $start]} { + #assertion if end wasn't in startpoits list - then start won't be in endpoints list + tcl::dict::lappend charset_extents_startpoints $start $end + tcl::dict::lappend charset_extents_endpoints $end $start + } + tcl::dict::lappend charset_extents_rangenames ${start},${end} [list $setname 1] + } else { + #multirange sets/scripts. have holes. Char not a member if it's not explicitly in a defined range. + #They should be in order within a set - but we don't assume so + set r 1 + foreach range $ranges { + set start [tcl::dict::get $range start] + set end [tcl::dict::get $range end] + if {![tcl::dict::exists $charset_extents_startpoints $start] || $end ni [tcl::dict::get $charset_extents_startpoints $start]} { + #assertion if end wasn't in startpoits list - then start won't be in endpoints list + tcl::dict::lappend charset_extents_startpoints $start $end + tcl::dict::lappend charset_extents_endpoints $end $start + } + tcl::dict::lappend charset_extents_rangenames ${start},${end} [list $setname $r] + incr r + } + } + } + #maintain in sorted order + #-stride is available in lsort even at tcl8.6 - but not in lsearch + set charset_extents_startpoints [lsort -stride 2 -integer $charset_extents_startpoints] + set charset_extents_endpoints [lsort -stride 2 -integer $charset_extents_endpoints] + #no need to sort charset_extents_rangenames - lookup only done using dict methods + return [tcl::dict::size $charset_extents_startpoints] + } + _build_charset_extents ;#rebuilds for all charsets + + #nerdfonts are within the Private use E000 - F8FF range + proc load_nerdfonts {} { + variable charsets + variable charinfo + package require fileutil + set ver [package provide punk::char] + if {$ver ne ""} { + set ifneeded [package ifneeded punk::char [package provide punk::char]] + #puts stderr "punk::char ifneeded script: $ifneeded" + lassign [split $ifneeded ";"] _ sourceinfo + set basedir [file dirname [lindex $sourceinfo end]] + } else { + #review - will only work at package load time + set scr [info script] + if {$scr eq ""} { + error "load_nerdfonts unable to determine package folder" + } + set basedir [file dirname [info script]] + } + set pkg_data_dir [file join $basedir char] + set fname [file join $pkg_data_dir nerd-fonts-glyph-list.txt] + if {[file exists $fname]} { + #puts stderr "load_nerdfonts loading $fname" + set data [fileutil::cat -translation binary $fname] + set short_seen [tcl::dict::create] + set current_set_range [tcl::dict::create] + set filesets_loading [list] + foreach ln [split $data \n] { + set ln [tcl::string::trim $ln] + if {$ln eq ""} {continue} + set desc [lassign $ln hex rawsetname] + set hexnum 0x$hex + set dec [expr $hexnum] + set setname "nf_$rawsetname" ;#Ensure nerdfont set names are prefixed. + + if {$setname ni $filesets_loading} { + if {![tcl::dict::exists $charsets $setname]} { + #set exists - but not in our filesets_loading list - therefore this set has been previously loaded, so clear old data first + dict unset charset $setname + } + set newrange [list start $dec end $dec] + tcl::dict::set current_set_range $setname $newrange + tcl::dict::set charsets $setname [list ranges [list $newrange] description "nerd fonts $rawsetname" settype "nerdfonts privateuse"] + + lappend filesets_loading $setname + } + #expects ordered glyph list + set existing_range [tcl::dict::get $current_set_range $setname] + set existing_end [tcl::dict::get $existing_range end] + if {$dec - $existing_end == 1} { + #part of current range + tcl::dict::set current_set_range $setname end $dec + #overwrite last ranges element + set rangelist [lrange [tcl::dict::get $charsets $setname ranges] 0 end-1] + lappend rangelist [tcl::dict::get $current_set_range $setname] + tcl::dict::set charsets $setname ranges $rangelist + } else { + #new range for set + tcl::dict::set current_set_range $setname start $dec + tcl::dict::set current_set_range $setname end $dec + set rangelist [tcl::dict::get $charsets $setname ranges] + lappend rangelist [tcl::dict::get $current_set_range $setname] + tcl::dict::set charsets $setname ranges $rangelist + } + + if {![tcl::dict::exists $charinfo $dec]} { + # -- --- + #review + set map [list beaufort bf gibbous gb crescent cr thunderstorm tstorm thermometer thermom] + lappend map {*}[list directory dir creativecommons ccom creative_commons ccom forwardslash fs] + lappend map {*}[list multimedia mm multiple multi outline outl language lang] + lappend map {*}[list odnoklassniki okru] + # -- --- + #consider other ways to unambiguously shorten names? + #normalize nf_fa & nf_fa 'o' element to 'outl' so outlines can be searched across sets more easily (o not necessarily at last position) + set normdesc [list] + foreach el $desc { + if {$el eq "o"} { + set el "outl" + } + lappend normdesc $el + } + set joined_desc [join $normdesc _] + #map after join so we can normalize some underscored elements e.g creativecommons & creative_commons + set mapped_desc [tcl::string::map $map $joined_desc] + set s nf_${rawsetname}_$mapped_desc + + if {![tcl::dict::exists $short_seen $s]} { + tcl::dict::set short_seen $s {} + } else { + #duplicate in the data file (e.g 2023 weather night alt rain mix) + set s ${s}_$hex + } + tcl::dict::set charinfo $dec [list desc "$desc" short $s] + } + } + _build_charshort + _build_charset_extents + } else { + puts stderr "unable to find glyph file. Tried $fname" + } + } + + proc package_base {} { + #assume punk::char is in .tm form and we can use the package provide statement to determine base location + #review + set pkgver [package present punk::char] + set pkginfo [package ifneeded punk::char $pkgver] + set tmfile [lindex $pkginfo end] + set pkg_base [file dirname $tmfile] + return $pkg_base + } + tcl::namespace::eval internal { + proc unicode_folder {} { + set parent [file join [punk::char::package_base] char] + set candidates [glob -nocomplain -type d -dir $parent -tail unicode*] + set candidates [lsort -increasing $candidates] ;#review - dictionary sort - how are unicode versions ranked/compared?? + if {![llength $candidates]} { + error "Failed to find unicode data folder in folder '$parent'" + } + set folder [file join $parent [lindex $candidates end]] + return $folder + } + proc dict_getdef {dictValue args} { + if {[llength $args] < 2} { + error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} + } + set keys [lrange $args 0 end-1] + if {[tcl::dict::exists $dictValue {*}$keys]} { + return [tcl::dict::get $dictValue {*}$keys] + } else { + return [lindex $args end] + } + } + } + + + + #charsets structure + #tcl::dict::set charsets "halfwidth_and_fullwidth_forms" [list ranges [list {start 65280 end 65519}] description "Halfwidth and Fullwidth Forms (variants) settype block"] + + #unicode Blocks.txt + #load the defined blocks into 'charsets' and mark as type 'block'. Unicode blocks have only one range - and don't overlap. + #We don't treat unassigned/reserved codes within a block specially at this stage - ie we will not chop a block into subranges on that basis. + #unassigned code points should get certain default properties (e.g bidirectionality ) according to their block - so it makes sense to treat them as belonging to the block. + #They also get the general property of Cn (Other,not assigned or Other,reserved) and a "Basic Type" of Noncharacter or Reserved + proc load_unicode_blocks {} { + #sample data line + #0000..007F; Basic Latin + variable charsets + set file [file join [internal::unicode_folder] Blocks.txt] + if {![file exists $file]} { + error "Unicode Blocks.txt file not found at path '$file'" + } + puts "ok.. loading" + set fd [open $file r] + fconfigure $fd -translation binary + set data [read $fd] + close $fd + set block_count 0 + foreach ln [split $data \n] { + set ln [tcl::string::trim $ln] + if {[tcl::string::match #* $ln]} { + continue + } + if {[set pcolon [tcl::string::first ";" $ln]] > 0} { + set lhs [tcl::string::trim [tcl::string::range $ln 0 $pcolon-1]] + set name [tcl::string::trim [tcl::string::range $ln $pcolon+1 end]] + set lhsparts [split $lhs .] + set start [lindex $lhsparts 0] + set end [lindex $lhsparts end] + #puts "$start -> $end '$name'" + set decimal_start [expr {"0x$start"}] + set decimal_end [expr {"0x$end"}] + tcl::dict::set charsets $name [list ranges [list [list start $decimal_start end $decimal_end note "unicode block $lhs"]] description "" settype block] + incr block_count + } + } + _build_charset_extents + return $block_count + } + + #unicode scripts + + #unicode UnicodeData.txt + + + + #https://www.unicode.org/reports/tr44/#Property_Values + + + #unicode EastAsianWidth.txt + #classify width of character - which is contextual in some cases + ##### + #Review - this is initial naive assumption that should get us mostly what we want for layout purposes in a utf-8-centric world. + #We will just load the values and treat H,N,Na as 1-wide and A,F,W as 2-wide for functions such as char::string_width on the basis that those using legacy sets can query the property and make their own determinations in those contexts. + #### + # -- --- + #A = Ambiguous - All characters that can be sometimes wide and sometimes narrow. (wide in east asian legacy sets, narrow in non-east asian usage) (private use chars considered ambiguous) + #F = East Asian Full-width + #H = East Asian Half-width + #N = Not east Asian (Neutral) - all other characters. (do not occur in legacy East Asian character sets) - treated like Na + #Na = East Asian Narrow - all other characters that are always narrow and have explicit full-width counterparts (e.g includes all of ASCII) + #W = East Asian Wide - all other characters that are always wide (Occur only in the context of Eas Asian Typography) + # -- --- + + + + + proc charshort {shortname} { + variable charshort + return [tcl::dict::get $charshort $shortname] + } + + proc box_drawing {args} { + return [charset "Box Drawing" {*}$args] + } + proc box_drawing_dict {} { + return [charset_dict "Box Drawing"] + } + + proc char_info_hex {hex args} { + set hex [tcl::string::map [list _ ""] $hex] + if {[tcl::string::is xdigit -strict $hex]} { + #has no leading 0x + set dec [expr {"0x$hex"}] + } else { + set dec [expr {$hex}] + } + return [char_info_dec $dec {*}$args] + } + proc char_info {char args} { + #Note - on some versions of Tcl -e.g 8.6 use could supply something like \U1f600 (smiley icon) but we receive fffd (replacement special) + #there is no way to detect what the user intended ie we can't distinguish if they actually typed \UFFFD + #we can test if such mapping happens in general - and warn if codepoint is FFFD in the result dict + set returninfo [tcl::dict::create] + if {[tcl::string::equal \UFFFD $char] && [tcl::string::equal \U1F600 \UFFFD]} { + tcl::dict::set returninfo WARNING "this tcl maps multiple to FFFD" + } + lassign [scan $char %c%s] dec_char remainder + if {[tcl::string::length $remainder]} { + error "char_info requires a single character" + } + set result [tcl::dict::merge $returninfo [char_info_dec $dec_char {*}$args]] + } + proc char_info_dec {dec args} { + set dec_char [expr {$dec}] + set opts [tcl::dict::create\ + -fields {default}\ + -except {}\ + ] + #testwidth is so named because it peforms an actual test on the console using ansi escapes - and the name gives a hint that it is a little slow + set known_fields [list all default dec hex desc short testwidth char memberof] ;#maint fields from charinfo 'desc' 'short' + #todo - unicode properties + # tclwhitespace (different to unicode concept of whitespace. review ) + + foreach {k v} $args { + switch -- $k { + -fields - -except { + tcl::dict::set opts $k $v + } + default { + error "char_info unrecognised option '$k'. Known options:'[tcl::dict::keys $opts]' known_fields: $known_fields usage: char_info ?-fields {}? ?-except {}?" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_fields [tcl::dict::get $opts -fields] + set opt_except [tcl::dict::get $opts -except] + # -- --- --- --- --- --- --- --- --- --- --- --- + set initial_fields [list] + if {"default" in $opt_fields} { + set initial_fields $known_fields + if {"testwidth" ni $opt_fields} { + if {"testwidth" ni $opt_except} { + lappend opt_except testwidth + } + } + if {"char" ni $opt_fields} { + if {"char" ni $opt_except} { + lappend opt_except char + } + } + } elseif {"all" in $opt_fields} { + set initial_fields $known_fields + } else { + foreach f $opt_fields { + if {$f in $known_fields} { + lappend initial_fields $f + } else { + error "char_info unknown field name: '$f' known fields: '$known_fields'" + } + } + } + foreach e $opt_except { + if {$e ni $known_fields} { + error "char_info unknown field name $e in -except. known fields: '$known_fields'" + } + } + set fields [list] + foreach f $initial_fields { + if {$f ne "all" && $f ni $opt_except} { + lappend fields $f + } + } + if {![llength $fields]} { + return + } + + variable charinfo + variable charsets + set hex_char [format %04x $dec_char] + set returninfo [tcl::dict::create] + foreach f $fields { + switch -- $f { + dec { + tcl::dict::set returninfo dec $dec_char + } + hex { + tcl::dict::set returninfo hex $hex_char + } + desc { + if {[tcl::dict::exists $charinfo $dec_char desc]} { + tcl::dict::set returninfo desc [tcl::dict::get $charinfo $dec_char desc] + } else { + tcl::dict::set returninfo desc "" + } + } + short { + if {[tcl::dict::exists $charinfo $dec_char short]} { + tcl::dict::set returninfo desc [tcl::dict::get $charinfo $dec_char short] + } else { + tcl::dict::set returninfo short "" + } + } + testwidth { + #todo - expectedwidth - lookup the printing width it is *supposed* to have from unicode tables + #testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time + set existing_testwidth "" + if {[tcl::dict::exists $charinfo $dec_char testwidth]} { + set existing_testwidth [tcl::dict::get $charinfo $dec_char testwidth] + } + if {$existing_testwidth eq ""} { + #no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.) + set char [format %c $dec_char] + set chwidth [char_info_testwidth $char] + + tcl::dict::set returninfo testwidth $chwidth + #cache it. todo - -verify flag to force recalc in case font/terminal changed in some way? + tcl::dict::set charinfo $dec_char testwidth $chwidth + } else { + tcl::dict::set returninfo testwidth $existing_testwidth + } + } + char { + set char [format %c $dec_char] + tcl::dict::set returninfo char $char + } + memberof { + #memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising + #note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4) + #This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges. + #We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here. + #some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char) + set memberof [list] + tcl::dict::for {setname setinfo} $charsets { + foreach r [tcl::dict::get $setinfo ranges] { + set s [tcl::dict::get $r start] + set e [tcl::dict::get $r end] + if {$dec_char >= $s && $dec_char <= $e} { + lappend memberof $setname + break + } + } + } + tcl::dict::set returninfo memberof $memberof + } + } + } + + return $returninfo + } + + proc _char_info_dec_memberof_scan {dec} { + variable charsets + set memberof [list] + tcl::dict::for {setname setinfo} $charsets { + foreach r [tcl::dict::get $setinfo ranges] { + set s [tcl::dict::get $r start] + set e [tcl::dict::get $r end] + if {$dec >= $s && $dec <= $e} { + lappend memberof $setname + break + } + } + } + return $memberof + } + proc range_split_info {dec} { + variable charset_extents_startpoints + variable charset_extents_endpoints + set skeys [tcl::dict::keys $charset_extents_startpoints] + set ekeys [tcl::dict::keys $charset_extents_endpoints] + set splen [tcl::dict::size $charset_extents_startpoints] + set eplen [tcl::dict::size $charset_extents_endpoints] + set s [lsearch -bisect -integer $skeys $dec] + set s_at_or_below [lrange $skeys 0 $s] + set e_of_s [list] + foreach sk $s_at_or_below { + lappend e_of_s {*}[tcl::dict::get $charset_extents_startpoints $sk] + } + set e_of_s [lsort -integer $e_of_s] + set splitposn [lsearch -bisect -integer $e_of_s $dec] + if {[lindex $e_of_s $splitposn] < $dec} {incr splitposn} + #set lhs_endpoints_to_check [expr {[llength $e_of_s] - $splitposn}] + set reduced_endpoints [lrange $e_of_s $splitposn end] + set sps [list] + foreach ep $reduced_endpoints { + lappend sps {*}[tcl::dict::get $charset_extents_endpoints $ep] + } + + + + set e [lsearch -bisect -integer $ekeys $dec] + if {$e >= 0} { + set e_at_or_above [lrange $ekeys $e end] + set s_of_e [list] + foreach ek $e_at_or_above { + lappend s_of_e {*}[tcl::dict::get $charset_extents_endpoints $ek] + } + set startpoints_of_above [llength $s_of_e] + set splitposn [lsearch -bisect -integer $s_of_e $dec] + set reduced_startpoints [lrange $s_of_e 0 $splitposn] + set eps [list] + foreach sp $reduced_startpoints { + lappend eps {*}[tcl::dict::get $charset_extents_startpoints $sp] + } + } else { + set s_of_e [list] + set reduced_startpoints [list] + set eps [list] + + } + + return [tcl::dict::create startpoints $splen endpoints $eplen midpoint [expr {floor($eplen/2)}] posn $e lhs_endpoints_to_check "[llength $reduced_endpoints]/[llength $e_of_s]=[llength $sps]ranges" rhs_startpoints_to_check "[llength $reduced_startpoints]/[llength $s_of_e]=[llength $eps]"] + } + #for just a few extra sets such as wgl4 and unicode blocks loaded - this gives e.g 5x + better performance than the simple search above, and up to twice as slow for tcl 8.6 + #performance biased towards lower numbered characters (which is not too bad in the context of unicode) + #todo - could be tuned to perform better at end by assuming a fairly even distribution of ranges - and so searching startpoint ranges first for items left of middle, endpoint ranges first for items right of middle + #review with scripts loaded and more defined ranges.. + #This algorithm supports arbitrary overlapping ranges and ranges with same start & endpoints + #Should be much better than O(n) for n sets except for pathological case of member of all or nearly-all intervals ? + #review - compare with 'interval tree' algorithms. + proc char_info_dec_memberof {dec} { + variable charset_extents_startpoints + variable charset_extents_endpoints + variable charset_extents_rangenames + if {[package vcompare [info tclversion] 8.7a5] >= 0} { + #algorithm should theoretically be a little better with -stride + set last_smaller_or_equal_startposn [lsearch -stride 2 -bisect -integer $charset_extents_startpoints $dec] + set sets_starting_below [lrange $charset_extents_startpoints 0 $last_smaller_or_equal_startposn+1] ;#+1 to include 2nd element of stridden pair + set endpoints_of_starting_below [lsort -integer [concat {*}[tcl::dict::values $sets_starting_below]]] + } else { + #no -stride available + set startkeys [tcl::dict::keys $charset_extents_startpoints] + set last_smaller_or_equal_startkeyposn [lsearch -bisect -integer $startkeys $dec] ;#assert will always return one of the keys if number >=0 supplied (last key if > all) + #set startkey_found [lindex $startkeys $last_smaller_or_equal_startkeyposn] + set start_below_keys [lrange $startkeys 0 $last_smaller_or_equal_startkeyposn] ;#These are the keys of sets which start at or below dec + #puts "start_below_keys: '$start_below_keys'" + set endpoints_of_starting_below [list] + foreach belowkey $start_below_keys { + lappend endpoints_of_starting_below {*}[tcl::dict::get $charset_extents_startpoints $belowkey] + } + set endpoints_of_starting_below [lsort -integer $endpoints_of_starting_below[unset endpoints_of_starting_below]] + } + + set splitposn [lsearch -bisect -integer $endpoints_of_starting_below $dec] ;#splitposn = last smaller or equal endposn + if {[lindex $endpoints_of_starting_below $splitposn] < $dec} { incr splitposn} + set reduced_opposite_limit [lrange $endpoints_of_starting_below $splitposn end] + ################ + #note each endpoint points to multiple startpoints which may still include some that are not in range. (e.g range y can share endpoint with x that starts in-range - but y starts above character ) + # x1 x2 + # y1 y2 + # c + ################ + #we have reduced our set of endpoints sufficiently (to those at or above dec) to run through and test each startpoint + set ranges [list] + foreach ep $reduced_opposite_limit { + foreach s [tcl::dict::get $charset_extents_endpoints $ep] { + if {$s <= $dec} { + lappend ranges [tcl::dict::get $charset_extents_rangenames $s,$ep] + } + } + } + return $ranges + + } + + + #with glob searching of description and short + proc char_range_dict {start end args} { + if {![tcl::string::is integer -strict $start] || ![tcl::string::is integer -strict $end]} { + error "char_range_dict error start and end must be integers" + } + set and_globs [list] + if {![llength $args]} { + set args [list *] + } + foreach glob $args { + if {![regexp {[*?]} $glob]} { + lappend and_globs "*$glob*" + } else { + lappend and_globs $glob + } + } + variable charinfo + set cdict [tcl::dict::create] + set start [expr {$start}] ;#force string rep to decimal - otherwise first use of i as string could be hex or other rep whilst other i values will be decimal string rep due to incr + for {set i $start} {$i <= $end} {incr i} { + set hx [format %04x $i] + set ch [format %c $i] + if {[tcl::dict::exists $charinfo $i desc]} { + set d [tcl::dict::get $charinfo $i desc] + } else { + set d "" + } + if {[tcl::dict::exists $charinfo $i short]} { + set s [tcl::dict::get $charinfo $i short] + } else { + set s "" + } + set matchcount 0 + foreach glob $and_globs { + if {[tcl::string::match -nocase $glob $s] || [tcl::string::match -nocase $glob $d]} { + incr matchcount + } + } + if {$matchcount == [llength $and_globs]} { + if {[tcl::dict::exists $charinfo $i]} { + tcl::dict::set cdict $hx [tcl::dict::merge [tcl::dict::create dec $i hex $hx char $ch] [tcl::dict::get $charinfo $i]] + } else { + tcl::dict::set cdict $hx [list dec $i hex $hx char $ch desc $d short $s] + } + } + } + return $cdict + } + #with glob searches of desc and short + proc char_range {start end args} { + package require overtype + if {![tcl::string::is integer -strict $start] || ![tcl::string::is integer -strict $end]} { + error "char_range error start and end must be integers" + } + set charset_dict [char_range_dict $start $end {*}$args] + set out "" + set col3 [tcl::string::repeat " " 12] + tcl::dict::for {k inf} $charset_dict { + set s [internal::dict_getdef $inf short ""] + set d [internal::dict_getdef $inf desc ""] + set s_col [overtype::left $col3 $s] + append out "$k [tcl::dict::get $inf dec] [tcl::dict::get $inf char] $s_col $d" \n + } + return $out + } + + + #non-overlapping unicode blocks + proc char_blocks {{name_or_glob *}} { + variable charsets + #todo - more efficient datastructures? + if {![regexp {[?*]} $name_or_glob]} { + #no glob - just retrieve it + if {[tcl::dict::exists $charsets $name_or_glob]} { + if {[tcl::dict::get $charsets $name_or_glob settype] eq "block"} { + return [tcl::dict::create $name_or_glob [tcl::dict::get $charsets $name_or_glob]] + } + } + #no exact match - try case insensitive.. + set name [lsearch -inline -nocase [tcl::dict::keys $charsets] $name_or_glob] + if {$name ne ""} { + if {[tcl::dict::get $charsets $name settype] eq "block"} { + return [tcl::dict::create $name [tcl::dict::get $charsets $name]] + } + } + + } else { + #build a subset + set charsets_block [tcl::dict::create] + tcl::dict::for {k v} $charsets { + if {[tcl::string::match -nocase $name_or_glob $k]} { + if {[tcl::dict::get $v settype] eq "block"} { + tcl::dict::set charsets_block $k $v + } + } + } + return $charsets_block + } + } + proc charset_names {{name_or_glob *}} { + variable charsets + if {![regexp {[?*]} $name_or_glob]} { + #no glob - just retrieve it + if {[tcl::dict::exists $charsets $name_or_glob]} { + return [list $name_or_glob] + } + #no exact match - try case insensitive.. + set name [lsearch -inline -nocase [tcl::dict::keys $charsets] $name_or_glob] + if {$name ne ""} { + return [list $name] + } + } else { + if {$name_or_glob eq "*"} { + return [lsort [tcl::dict::keys $charsets]] + } + #tcl::dict::keys $dict doesn't have option for case insensitive searches + return [lsort [lsearch -all -inline -nocase [tcl::dict::keys $charsets] $name_or_glob]] + } + } + + #deprecated + #major named sets such as unicode blocks, scripts, and other sets such as microsoft WGL4 + #case insensitive search - possibly with *basic* globs (? and * only - not lb rb) + proc charset_names2 {{namesearch *}} { + variable charsets + #dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results + #set sortedkeys [lsort -increasing -dictionary [tcl::dict::keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below + set sortedkeys [lsort -increasing [tcl::dict::keys $charsets]] + if {$namesearch eq "*"} { + return $sortedkeys + } + if {[regexp {[?*]} $namesearch]} { + #name glob search + return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used + } else { + #return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted? + return [lsearch -inline -nocase $sortedkeys $namesearch] ;#no globs + } + } + proc charsets {{namesearch *}} { + package require textblock + variable charsets + set charset_names [charset_names $namesearch] + set settype_list [list] + foreach setname $charset_names { + lappend settype_list [tcl::dict::get $charsets $setname settype] + } + + set charset_names [linsert $charset_names 0 "Set Name"] + set settype_list [linsert $settype_list 0 "Set Type"] + + return [textblock::join -- [list_as_lines -- $charset_names] " " [list_as_lines $settype_list]] + } + proc charset_defget {exactname} { + variable charsets + return [tcl::dict::get $charsets $exactname] + } + proc charset_defs {charsetname} { + variable charsets + set matches [charset_names $charsetname] + set def_list [list] + foreach setname $matches { + lappend def_list [tcl::dict::create $setname [tcl::dict::get $charsets $setname]] + } + return [join $def_list \n] + } + proc charset_dictget {exactname} { + variable charsets + set setinfo [tcl::dict::get $charsets $exactname] + set ranges [tcl::dict::get $setinfo ranges] + set charset_dict [tcl::dict::create] + foreach r $ranges { + set start [tcl::dict::get $r start] + set end [tcl::dict::get $r end] + set charset_dict [tcl::dict::merge $charset_dict [char_range_dict $start $end]] + } + return $charset_dict + } + proc charset_dicts {searchname} { + variable charsets + set matches [charset_names $searchname] + if {![llength $matches]} { + error "No charset found matching name '$searchname' - use 'charset_names' to get list" + } + set dict_list [list] + foreach m $matches { + lappend dict_list [tcl::dict::create $m [charset_dictget $m]] + } + #return $dict_list + return [join $dict_list \n] + } + proc charset_page {namesearch args} { + _charset_page_search $namesearch $args ;#pass args to descsearch argument + } + proc _charset_page_search {namesearch search_this_and_that args} { + variable charsets + variable charinfo + set matched_names [charset_names $namesearch] + if {![llength $matched_names]} { + error "charset_page no charset matched pattern '$namesearch' - use 'charset_names' to get list" + } + set defaults [tcl::dict::create\ + -ansi 0\ + -lined 1\ + ] + set opts [tcl::dict::merge $defaults $args] + # -- --- --- --- + set opt_ansi [tcl::dict::get $opts -ansi] + set opt_lined [tcl::dict::get $opts -lined] + # -- --- --- --- + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + + if {$opt_ansi} { + set a1 [a BLACK white bold] + set a2 [a] + } else { + set a1 "" + set a2 "" + } + set cols 16 + set prefix " " + append out $prefix + foreach charsetname $matched_names { + if {[llength $search_this_and_that]} { + set setinfo [tcl::dict::get $charsets $charsetname] + set ranges [tcl::dict::get $setinfo ranges] + set charset_dict [tcl::dict::create] + foreach r $ranges { + set start [tcl::dict::get $r start] + set end [tcl::dict::get $r end] + set charset_dict [tcl::dict::merge $charset_dict [char_range_dict $start $end {*}$search_this_and_that]] + } + } else { + set charset_dict [charset_dictget $charsetname] + } + if {![tcl::dict::size $charset_dict]} { + continue + } + set i 1 + append out \n $prefix $charsetname + append out \n + + set marker_line $prefix + set line $prefix + tcl::dict::for {hex inf} $charset_dict { + set ch [tcl::dict::get $inf char] + set twidth "" + set dec [expr {"0x$hex"}] + if {[tcl::dict::exists $charinfo $dec testwidth]} { + set twidth [tcl::dict::get $charinfo $dec testwidth] + } + if {$twidth eq ""} { + #set width [ansifreestring_width $ch] ;#based on unicode props + set width [grapheme_width_cached $ch] + } else { + set width $twidth + } + if {$width == 0} { + set marker " " + if {[regexp $re_diacritics $ch]} { + #attempt to combine with space to get 3-wide displayv with diacritic showing at left space + #todo - dualchar diacritics? + set displayv " $ch " + } else { + set displayv " " + } + } elseif {$width == 1} { + set marker "_ " + set displayv "${a1}$ch${a2} " + } else { + #presumed 2 + set marker "__ " + set displayv "${a1}$ch${a2} " + } + set hexlen [tcl::string::length $hex] + append marker_line "[tcl::string::repeat " " $hexlen] $marker" + append line "$hex $displayv" + if {$i == [tcl::dict::size $charset_dict] || $i % $cols == 0} { + if {$opt_lined} { + append out $marker_line \n + } + append out $line \n + set marker_line $prefix + set line $prefix + #set out [tcl::string::range $out 0 end-2] + #append out \n " " + } + incr i + } + } + set out [tcl::string::trimright $out " "] + return $out + } + + #allows search on both name and an anded list of globs to be applied to description & short + proc charset {namesearch args} { + package require overtype + variable charsets + set matched_names [charset_names $namesearch] + if {![llength $matched_names]} { + error "No charset matched pattern '$namesearch' - use 'charset_names' to get list" + } + set search_this_and_that $args + + set out "" + + foreach charsetname $matched_names { + if {[llength $search_this_and_that]} { + set setinfo [tcl::dict::get $charsets $charsetname] + set ranges [tcl::dict::get $setinfo ranges] + set charset_dict [tcl::dict::create] + foreach r $ranges { + set start [tcl::dict::get $r start] + set end [tcl::dict::get $r end] + set charset_dict [tcl::dict::merge $charset_dict [char_range_dict $start $end {*}$search_this_and_that]] + } + } else { + set charset_dict [charset_dictget $charsetname] + } + + set col_items_short [list] + set col_items_desc [list] + tcl::dict::for {k inf} $charset_dict { + lappend col_items_desc [internal::dict_getdef $inf desc ""] + lappend col_items_short [internal::dict_getdef $inf short ""] + } + if {[llength $col_items_desc]} { + set widest3 [tcl::mathfunc::max {*}[lmap v $col_items_short {tcl::string::length $v}]] + if {$widest3 == 0} { + set col3 " " + } else { + set col3 [tcl::string::repeat " " $widest3] + } + tcl::dict::for {k inf} $charset_dict { + set s [internal::dict_getdef $inf short ""] + set d [internal::dict_getdef $inf desc ""] + set s_col [overtype::left $col3 $s] + append out "$k [tcl::dict::get $inf char] $s_col $d" \n + } + } + } + + return $out + } + + #use console cursor movements to test and cache the column-width of each char in the set of characters returned by the search criteria + proc charset_calibrate {namesearch args} { + variable charsets + variable charinfo + set matched_names [charset_names $namesearch] + if {![llength $matched_names]} { + error "No charset matched pattern '$namesearch' - use 'charset_names' to get list" + } + set search_this_and_that $args + set charcount 0 + set width_results [tcl::dict::create] + puts stdout "calibrating using terminal cursor movements.." + foreach charsetname $matched_names { + if {[llength $search_this_and_that]} { + set setinfo [tcl::dict::get $charsets $charsetname] + set ranges [tcl::dict::get $setinfo ranges] + set charset_dict [tcl::dict::create] + foreach r $ranges { + set start [tcl::dict::get $r start] + set end [tcl::dict::get $r end] + set charset_dict [tcl::dict::merge $charset_dict [char_range_dict $start $end {*}$search_this_and_that]] + } + } else { + set charset_dict [charset_dictget $charsetname] + } + if {![tcl::dict::size $charset_dict]} { + continue + } + tcl::dict::for {hex inf} $charset_dict { + set ch [format %c 0x$hex] + set twidth "" + set dec [expr {"0x$hex"}] + if {[tcl::dict::exists $charinfo $dec testwidth]} { + set twidth [tcl::dict::get $charinfo $dec testwidth] + } + if {$twidth eq ""} { + #puts -nonewline stdout "." ;#this + set width [char_info_testwidth $ch] ;#based on console test rather than unicode props + tcl::dict::set charinfo $dec testwidth $width + } else { + set width $twidth + } + tcl::dict::incr width_results $width + incr charcount + } + } + puts stdout "\ncalibration done - results cached in charinfo dictionary" + return [tcl::dict::create charcount $charcount widths $width_results] + } + + #maint warning - also in overtype! + #intended for single grapheme - but will work for multiple + #cannot contain ansi or newlines + #(a cache of ansifreestring_width calls - as these are quite regex heavy) + #review - effective memory leak on longrunning programs if never cleared + #tradeoff in fragmenting cache and reducing efficiency vs ability to clear in a scoped manner + proc grapheme_width_cached {ch {key ""}} { + variable grapheme_widths + #if key eq "*" - we won't be able to clear that cache individually. Perhaps that's ok + if {[tcl::dict::exists $grapheme_widths $key $ch]} { + return [tcl::dict::get $grapheme_widths $key $ch] + } + set width [punk::char::ansifreestring_width $ch] ;#review - can we provide faster version if we know it's a single grapheme rather than a string? (grapheme is still a string as it may have combiners/diacritics) + tcl::dict::set grapheme_widths $key $ch $width + return $width + } + proc grapheme_width_cache_clear {key} { + variable grapheme_widths + if {$key eq "*} { + set grapheme_widths [tcl::dict::create] + } else { + tcl::dict::unset grapheme_widths $key + } + return + } + #no char_width - use grapheme_width terminology to be clearer + proc grapheme_width {char} { + error "grapheme_width unimplemented - use ansifreestring_width" + } + + #return N Na W etc from unicode data + #review + proc char_uc_width_prop {char} { + error "char_uc_width unimplemented try textutil::wcswidth_type" + } + #todo - provide a grapheme_width function that is optimised for speed + proc string_width {text} { + #burn approx 2uS (2024) checking for ansi codes - not just SGR + if {[punk::ansi::ta::detect $text]} { + puts stderr "string_width detected ANSI!" + } + if {[tcl::string::last \n $text] >= 0} { + error "string_width accepts only a single line" + } + tailcall ansifreestring_width $text + } + + #faster than textutil::wcswidth (at least for string up to a few K in length) + proc wcswidth {string} { + set codes [scan $string [tcl::string::repeat %c [tcl::string::length $string]]] + set width 0 + foreach c $codes { + if {$c <= 255} { + incr width + } else { + set w [textutil::wcswidth_char $c] + if {$w < 0} { + return -1 + } else { + incr width $w + } + } + } + return $width + } + #faster than textutil::wcswidth (at least for string up to a few K in length) + proc wcswidth1 {string} { + set codes [scan $string [tcl::string::repeat %c [tcl::string::length $string]]] + set width 0 + foreach c $codes { + set w [textutil::wcswidth_char $c] + if {$w < 0} { + return -1 + } else { + incr width $w + } + } + return $width + } + proc wcswidth2 {string} { + set codes [scan $string [tcl::string::repeat %c [tcl::string::length $string]]] + set widths [lmap c $codes {textutil::wcswidth_char $c}] + if {-1 in $widths} { + return -1 + } + return [tcl::mathop::+ {*}$widths] + } + + #prerequisites - no ansi escapes - no newlines - utf8 encoding assumed + #review - what about \r \t \b ? + #NO processing of \b - already handled in ansi::printing_length which then calls this + #this version breaks string into sequences of ascii vs unicode + proc ansifreestring_width {text} { + #caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines + #we can c0 control characters after or while processing ansi escapes. + #we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!) + #anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error + #if {[tcl::string::first \033 $text] >= 0} { + # error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first" + #} + + + #todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc + #as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence. + + + #- {Combining Diacritical Marks} {ranges {{start 768 end 879 note {unicode block 0300..036F}}} description {} settype block} + #- {Combining Diacritical Marks Extended} {ranges {{start 6832 end 6911 note {unicode block 1AB0..1AFF}}} description {} settype block} + #- {Combining Diacritical Marks Supplement} {ranges {{start 7616 end 7679 note {unicode block 1DC0..1DFF}}} description {} settype block} + #- {Combining Diacritical Marks for Symbols} {ranges {{start 8400 end 8447 note {unicode block 20D0..20FF}}} description {} settype block} + #- {Combining Half Marks} {ranges {{start 65056 end 65071 note {unicode block FE20..FE2F}}} description {} settype block} + # + # initial simplistic approach is just to strip these ... todo REVIEW + + #experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway + #(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then) + #set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)} + #if {[regexp $re_leading_diacritic $text]} { + # set text " $text" + #} + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + set text [regsub -all $re_diacritics $text ""] + + # -- --- --- --- --- --- --- + #review + #if we strip out ZWJ \u200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis + #as at 2024 - textutil::wcswidth just uses the unicode east-asian width property data and doesn't seem to handle these specially - it counts this joiner and others as one wide (also BOM \uFFEF) + + #TODO - once we have proper grapheme cluster splitting - work out which of these characters should be left in and/or when exactly their length-effects apply + # + #for now - strip them out + + #ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length + #ZWSP \u200b zero width space + + #\uFFEFBOM/ ZWNBSP and others that should be zero width + #todo - work out proper way to mark/group zero width. + + #set text [tcl::string::map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text] + set text [tcl::string::map [list \u200b "" \u200c "" \u200d ""] $text] + #\uFFEF tends to print as 1 length replacement char - REVIEW + #\uFFFF varies between terminals - some print replacement char (width 1) some print nothing (width 0) + # -- --- --- --- --- --- --- + + #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f + #todo - document that these shouldn't be present in input rather than explicitly checking here + + #c0 controls + set re_ascii_c0 {[\U0000-\U001F]} + set text [regsub -all $re_ascii_c0 $text ""] + + #c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective + #some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all + #we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here + #they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function) + set text [regsub -all {[\u0080-\u009f]+} $text ""] + + + #short-circuit basic cases + #support tcl pre 2023-11 - see regexp bug below + #if {![regexp {[\uFF-\U10FFFF]} $text]} { + # return [tcl::string::length $text] + #} + if {![regexp "\[\uFF-\U10FFFF\]" $text]} { + return [tcl::string::length $text] + } + + #split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first? + + #review + #set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525] + #tcl pre 2023-11 - braced high unicode regexes don't work + #fixed in bug-4ed788c618 2023-11 + #set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only + + #maintain unicode as sequences - todo - scan for grapheme clusters + #set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0100-\U10FFFF\])+" $text] + set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0000-\u00FF\])+" $text] + set len 0 + foreach {uc ascii} $uc_sequences { + #puts "-ascii $ascii" + #puts "-uc $uc" + incr len [tcl::string::length $ascii] + #textutil::wcswidth uses unicode data + #fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for) + #todo - find something that understands grapheme clusters - needed also for grapheme_split + #use punk::char::wcswidth - faster than the string split in textutil::wcswidth but still uses textutil::wcswidth_char + incr len [wcswidth $uc] + } + #todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies. + return $len + } + + #kept as a fallback for review/test if textutil::wcswidth doesn't do what we require on all terminals. + #this version looks at console testwidths if they've been cached. + #It is relatively fast - but tests unicode widths char by char - so won't be useful going forward for grapheme clusters. + proc ansifreestring_width2 {text} { + #caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines + + + + #we can c0 control characters after or while processing ansi escapes. + #we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!) + #anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error + #if {[tcl::string::first \033 $text] >= 0} { + # error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first" + #} + + + #todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc + #as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence. + + + #- {Combining Diacritical Marks} {ranges {{start 768 end 879 note {unicode block 0300..036F}}} description {} settype block} + #- {Combining Diacritical Marks Extended} {ranges {{start 6832 end 6911 note {unicode block 1AB0..1AFF}}} description {} settype block} + #- {Combining Diacritical Marks Supplement} {ranges {{start 7616 end 7679 note {unicode block 1DC0..1DFF}}} description {} settype block} + #- {Combining Diacritical Marks for Symbols} {ranges {{start 8400 end 8447 note {unicode block 20D0..20FF}}} description {} settype block} + #- {Combining Half Marks} {ranges {{start 65056 end 65071 note {unicode block FE20..FE2F}}} description {} settype block} + # + # initial simplistic approach is just to strip these ... todo REVIEW + + #experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway + #(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then) + #set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)} + #if {[regexp $re_leading_diacritic $text]} { + # set text " $text" + #} + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + set text [regsub -all $re_diacritics $text ""] + + #review + #if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis + #as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide + + #ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length + + #ZWSP \u0200b zero width space + + + #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f + #todo - document that these shouldn't be present in input rather than explicitly checking here + set re_ascii_c0 {[\U0000-\U001F]} + set text [regsub -all $re_ascii_c0 $text ""] + + #short-circuit basic cases + #support tcl pre 2023-11 - see regexp bug below + #if {![regexp {[\uFF-\U10FFFF]} $text]} { + # return [tcl::string::length $text] + #} + if {![regexp "\[\uFF-\U10FFFF\]" $text]} { + return [tcl::string::length $text] + } + + #review - wcswidth should detect these + set re_ascii_fullwidth {[\uFF01-\uFF5e]} + + set doublewidth_char_count 0 + set zerowidth_char_count 0 + #split just to get the standalone character widths - and then scan for other combiners (?) + #review + #set can_regex_high_unicode [tcl::string::match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525] + #tcl pre 2023-11 - braced high unicode regexes don't work + #fixed in bug-4ed788c618 2023-11 + #set uc_sequences [regexp -all -inline -indices {[\u0100-\U10FFFF]} $text] + #set uc_sequences [regexp -all -inline -indices "\[\u0100-\U10FFFF\]" $text] ;#e.g for string: \U0001f9d1abc\U001f525ab returns {0 0} {4 4} + set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only + foreach c $uc_chars { + if {[regexp $re_ascii_fullwidth $c]} { + incr doublewidth_char_count + } else { + #review + # a)- terminals lie - so we could have a bad cached testwidth + # b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs. + #(character width is a complex context-dependent topic) + # c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run. + # d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here + #Despite all this - the mechanism is hoped to give best effort consistency for terminals + #further work needed for combining emojis etc - which can't be done in a per character loop + #todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split + # to process sequences of unicode. + #- and the user has the option to test character sets first if terminal position reporting gives better results + if {[char_info_is_testwidth_cached $c]} { + set width [char_info_testwidth_cached $c] + } else { + #textutil::wcswidth uses unicode data + #fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for) + set width [textutil::wcswidth_char [scan $c %c]] + } + if {$width == 0} { + incr zerowidth_char_count + } elseif {$width == 2} { + incr doublewidth_char_count + } + } + } + #todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies. + return [expr {[tcl::string::length $text] + $doublewidth_char_count - $zerowidth_char_count}] + } + + #slow - textutil::wcswidth is slow with mixed ascii uc + proc ansifreestring_width3 {text} { + #caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines + + + + #we can c0 control characters after or while processing ansi escapes. + #we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!) + #anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error + #if {[tcl::string::first \033 $text] >= 0} { + # error "string_width doesn't accept ansi escape sequences. Use punk::ansi::ansistrip first" + #} + + + #review - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies. + # - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc + #as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence. + + #- {Combining Diacritical Marks} {ranges {{start 768 end 879 note {unicode block 0300..036F}}} description {} settype block} + #- {Combining Diacritical Marks Extended} {ranges {{start 6832 end 6911 note {unicode block 1AB0..1AFF}}} description {} settype block} + #- {Combining Diacritical Marks Supplement} {ranges {{start 7616 end 7679 note {unicode block 1DC0..1DFF}}} description {} settype block} + #- {Combining Diacritical Marks for Symbols} {ranges {{start 8400 end 8447 note {unicode block 20D0..20FF}}} description {} settype block} + #- {Combining Half Marks} {ranges {{start 65056 end 65071 note {unicode block FE20..FE2F}}} description {} settype block} + # + # initial simplistic approach is just to strip these ... todo REVIEW + + #experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway + #(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then) + #set re_leading_diacritic {^(?:[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+)} + #if {[regexp $re_leading_diacritic $text]} { + # set text " $text" + #} + set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + set text [regsub -all $re_diacritics $text ""] + + #we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f + #todo - document that these shouldn't be present in input rather than explicitly checking here + set re_ascii_c0 {[\U0000-\U001F]} + set text [regsub -all $re_ascii_c0 $text ""] + + #short-circuit basic cases + #support tcl pre 2023-11 - see regexp bug below + #if {![regexp {[\uFF-\U10FFFF]} $text]} { + # return [tcl::string::length $text] + #} + if {![regexp "\[\uFF-\U10FFFF\]" $text]} { + return [tcl::string::length $text] + } + + #slow when ascii mixed with unicode (but why?) + return [punk::char::wcswidth $text] + } + #This shouldn't be called on text containing ansi codes! + proc strip_nonprinting_ascii {str} { + #review - some single-byte 'control' chars have visual representations e.g ETX as heart depending on font/codepage + #It is currently used for screen display width calculations + #equivalent for various unicode combining chars etc? + set map [list\ + \x00 ""\ + \x07 ""\ + \x7f ""\ + ] + return [tcl::string::map $map $str] + } + + + + #split into plaintext and runs of combiners (combining diacritical marks - not ZWJ or ZWJNJ) + proc combiner_split {text} { + #split into odd numbered list (or zero) in a similar way to punk::ansi::ta::_perlish_split + # + #set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} + set graphemes [list] + if {[tcl::string::length $text] == 0} { + return {} + } + set list [list] + set start 0 + set strlen [tcl::string::length $text] + #make sure our regexes aren't non-greedy - or we may not have exit condition for loop + #review + while {$start < $strlen && [regexp -start $start -indices -- {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} $text match]} { + lassign $match matchStart matchEnd + #puts "->start $start ->match $matchStart $matchEnd" + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart $matchEnd] + set start [expr {$matchEnd+1}] + + #if {$start >= [tcl::string::length $text]} { + # break + #} + } + lappend list [tcl::string::range $text $start end] + } + + #ZWJ ZWNJ ? + + #1st shot - basic diacritics + #todo - become aware of unicode grapheme cluster boundaries + #This is difficult in Tcl without unicode property based Character Classes in the regex engine + #review - this needs to be performant - it is used a lot by punk terminal/ansi features + #todo - trie data structures for unicode? + #for now we can at least combine diacritics + #should also handle the ZWJ (and the variation selectors? eg \uFE0F) character which should account for emoji clusters + #Note - emoji cluster could be for example 11 code points/41 bytes (family emoji with skin tone modifiers for each member, 3 ZWJs) + #This still leaves a whole class of clusters.. korean etc unhandled :/ + proc grapheme_split {text} { + set graphemes [list] + set csplits [combiner_split $text] + foreach {pt combiners} [lrange $csplits 0 end-1] { + set clist [split $pt ""] + lappend graphemes {*}[lrange $clist 0 end-1] + lappend graphemes [tcl::string::cat [lindex $clist end] $combiners] + } + #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme + if {[lindex $csplits end] ne ""} { + lappend graphemes {*}[split [lindex $csplits end] ""] + } + return $graphemes + } + proc grapheme_split_dec {text} { + set graphemes [list] + set csplits [combiner_split $text] + foreach {pt combiners} [lrange $csplits 0 end-1] { + set pt_decs [scan $pt [tcl::string::repeat %c [tcl::string::length $pt]]] + set combiner_decs [scan $combiners [tcl::string::repeat %c [tcl::string::length $combiners]]] + lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs] + lappend graphemes {*}$pt_decs + } + #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme + if {[lindex $csplits end] ne ""} { + lappend graphemes {*}[scan [lindex $csplits end] [tcl::string::repeat %c [tcl::string::length [lindex $csplits end]]]] + } + return $graphemes + } + proc grapheme_split_dec2 {text} { + set graphemes [list] + set csplits [combiner_split $text] + foreach {pt combiners} $csplits { + set pt_decs [scan $pt [tcl::string::repeat %c [tcl::string::length $pt]]] + if {$combiners ne ""} { + set combiner_decs [scan $combiners [tcl::string::repeat %c [tcl::string::length $combiners]]] + lset pt_decs end [concat [lindex $pt_decs end] $combiner_decs] + } + lappend graphemes {*}$pt_decs + } + return $graphemes + } + proc grapheme_split2 {text} { + set graphemes [list] + set csplits [combiner_split $text] + foreach {pt combiners} [lrange $csplits 0 end-1] { + set clist [split $pt ""] + lappend graphemes {*}[lrange $clist 0 end-1] [tcl::string::cat [lindex $clist end] $combiners] + } + #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme + if {[lindex $csplits end] ne ""} { + lappend graphemes {*}[split [lindex $csplits end] ""] + } + return $graphemes + } + + # -- --- --- --- --- + #will accept a single char or a string - test using console cursor position reporting + #unreliable + proc char_info_testwidth {ch {emit 0}} { + package require punk::console + #uses cursor movement and relies on console to report position.. which it may do properly for single chars - but may misreport for combinations that combine into a single glyph + tailcall punk::console::test_char_width $ch $emit + } + proc char_info_testwidth_cached {char} { + variable charinfo + set dec [scan $char %c] + set twidth "" + if {[tcl::dict::exists $charinfo $dec testwidth]} { + set twidth [tcl::dict::get $charinfo $dec testwidth] + } + if {$twidth eq ""} { + set width [char_info_testwidth $char] + tcl::dict::set charinfo $dec testwidth $width + return $width + } else { + return $twidth + } + } + proc char_info_is_testwidth_cached {char} { + variable charinfo + return [tcl::dict::exists $charinfo [scan $char %c] testwidth] + } + # -- --- --- --- --- + + +} + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::char [tcl::namespace::eval punk::char { + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm new file mode 100644 index 00000000..3c64c7e3 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm @@ -0,0 +1,1828 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::console 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::ansi + + +#if {"windows" eq $::tcl_platform(platform)} { +# #package require zzzload +# #zzzload::pkg_require twapi +#} + +#see https://learn.microsoft.com/en-us/windows/console/classic-vs-vt +#https://learn.microsoft.com/en-us/windows/console/creating-a-pseudoconsole-session + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::console { + variable tabwidth 8 ;#default only - will attempt to detect and set to that configured in terminal + #Note that windows terminal cooked mode seems to use 8 for interactive use even if set differently + #e.g typing tab characters may still be echoed 8-spaced while writing to stdout my obey the terminal's tab stops. + variable has_twapi 0 + variable previous_stty_state_stdin "" + variable previous_stty_state_stdout "" + variable previous_stty_state_stderr "" + variable is_raw 0 + variable input_chunks_waiting + if {![info exists input_chunks_waiting(stdin)]} { + set input_chunks_waiting(stdin) [list] + } + variable ansi_response_chunk ;#array keyed on callid + variable ansi_response_wait ;#array keyed on callid + variable ansi_response_queue ;#list of callids + variable ansi_response_queuedata ;#dict keyed on callid - with function params + + # -- + variable ansi_available -1 ;#default -1 for unknown. Leave it this way so test for ansi support is run. + #-1 still evaluates to true - as the modern assumption for ansi availability is true + #only false if ansi_available has been set 0 by test_can_ansi + #support ansistrip for legacy windows terminals + # -- + + variable ansi_wanted 2 ;#2 for default assumed yes, will be set to -1 for automatically unwanted when ansi unavailable values of 0 or 1 won't be autoset + + #punk::console namespace - contains *directly* acting functions - some based on ansi escapes from the 'ansi' sub namespace, some on local system calls or executable calls wrapped in the 'local' sub namespace + #directly acting means they write to stdout to cause the console to peform the action, or they perform the action immediately via other means. + #punk::console::ansi contains a subset of punk::ansi, but with emission to stdout as opposed to simply returning the ansi sequence. + #punk::console::local functions are used by punk::console commands when there is no ansi equivalent + #ansi escape sequences are possibly preferable esp if terminal is remote to process running punk::console + # punk::local commands may be more performant in some circumstances where console is directly attached, but it shouldn't be assumed. e.g ansi::titleset outperforms local::titleset on windows with twapi. + + namespace eval ansi { + #ansi escape sequence based terminal/console control functions + namespace export * + } + namespace eval local { + #non-ansi terminal/console control functions + #e.g external utils system API's. + namespace export * + } + + if {"windows" eq $::tcl_platform(platform)} { + #accept args for all dummy/load functions so we don't have to match/update argument signatures here + + proc enableAnsi {args} { + #loopavoidancetoken (don't remove) + internal::define_windows_procs + internal::abort_if_loop + tailcall enableAnsi {*}$args + } + #review what raw mode means with regard to a specific channel vs terminal as a whole + proc enableRaw {args} { + #loopavoidancetoken (don't remove) + internal::define_windows_procs + internal::abort_if_loop + tailcall enableRaw {*}$args + } + proc disableRaw {args} { + #loopavoidancetoken (don't remove) + internal::define_windows_procs + internal::abort_if_loop + tailcall disableRaw {*}$args + } + proc enableVirtualTerminal {args} { + #loopavoidancetoken (don't remove) + internal::define_windows_procs + internal::abort_if_loop + tailcall enableVirtualTerminal {*}$args + } + proc disableVirtualTerminal {args} { + #loopavoidancetoken (don't remove) + internal::define_windows_procs + internal::abort_if_loop + tailcall disableVirtualTerminal {*}$args + } + set funcs [list disableAnsi enableProcessedInput disableProcessedInput] + foreach f $funcs { + proc $f {args} [string map [list %f% $f] { + set mybody [info body %f%] + internal::define_windows_procs + set newbody [info body %f%] + if {$newbody ne $mybody} { + tailcall %f% {*}$args + } else { + #error vs noop? + puts stderr "Unable to set implementation for %f% - check twapi?" + } + }] + } + + } else { + proc enableAnsi {} { + #todo? + } + proc disableAnsi {} { + + } + + #todo - something better - the 'channel' concept may not really apply on unix, as raw mode is for input and output modes + proc enableRaw {{channel stdin}} { + variable is_raw + variable previous_stty_state_$channel + set sttycmd [auto_execok stty] + if {[set previous_stty_state_$channel] eq ""} { + set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + } + + exec {*}$sttycmd raw -echo <@$channel + set is_raw 1 + return [dict create previous [set previous_stty_state_$channel]] + } + proc disableRaw {{channel stdin}} { + variable is_raw + variable previous_stty_state_$channel + set sttycmd [auto_execok stty] + if {[set previous_stty_state_$channel] ne ""} { + exec {*}$sttycmd [set previous_stty_state_$channel] + set previous_stty_state_$channel "" + set is_raw 0 + return restored + } + exec {*}$sttycmd -raw echo <@$channel + set is_raw 0 + return done + } + proc enableVirtualTerminal {{channels {input output}}} { + + } + proc disableVirtualTerminal {args} { + + } + } + + #review - document and decide granularity required. should we enable/disable more than one at once? + proc enable_mouse {} { + puts -nonewline stdout \x1b\[?1000h + puts -nonewline stdout \x1b\[?1003h + puts -nonewline stdout \x1b\[?1015h + puts -nonewline stdout \x1b\[?1006h + flush stdout + } + proc disable_mouse {} { + puts -nonewline stdout \x1b\[?1000l + puts -nonewline stdout \x1b\[?1003l + puts -nonewline stdout \x1b\[?1015l + puts -nonewline stdout \x1b\[?1006l + flush stdout + } + proc enable_bracketed_paste {} { + puts -nonewline stdout \x1b\[?2004h + } + proc disable_bracketed_paste {} { + puts -nonewline stdout \x1b\[?2004l + } + proc start_application_mode {} { + #need loop to read events? + puts -nonewline stdout \x1b\[?1049h ;#alt screen + enable_mouse + #puts -nonewline stdout \x1b\[?25l ;#hide cursor + puts -nonewline stdout \x1b\[?1003h\n + enable_bracketed_paste + + } + proc mode {{raw_or_line query}} { + variable is_raw + variable ansi_available + set raw_or_line [string tolower $raw_or_line] + if {$raw_or_line eq "query"} { + if {$is_raw} { + return "raw" + } else { + return "line" + } + } elseif {$raw_or_line eq "raw"} { + punk::console::enableRaw + if {[can_ansi]} { + punk::console::enableVirtualTerminal both + } + } elseif {$raw_or_line eq "line"} { + #review -order. disableRaw has memory from enableRaw.. but but for line mode we want vt disabled - so call it after disableRaw (?) + punk::console::disableRaw + if {[can_ansi]} { + punk::console::disableVirtualTerminal input ;#default readline arrow behaviour etc + punk::console::enableVirtualTerminal output ;#display/use ansi codes + } + } else { + error "punk::console::mode expected 'raw' or 'line' or default value 'query'" + } + } + + namespace eval internal { + proc abort_if_loop {{failmsg ""}} { + #puts "il1 [info level 1]" + #puts "thisproc: [lindex [info level 0] 0]" + set would_loop [uplevel 1 {expr {[string match *loopavoidancetoken* [info body [namespace tail [lindex [info level 0] 0]]]]}}] + #puts "would_loop: $would_loop" + if {$would_loop} { + set procname [uplevel 1 {namespace tail [lindex [info level 0] 0]}] + if {$failmsg eq ""} { + set errmsg "[namespace current] Failed to redefine procedure $procname" + } else { + set errmsg $failmsg + } + error $errmsg + } + } + proc define_windows_procs {} { + package require zzzload + set loadstate [zzzload::pkg_require twapi] + + #loadstate could also be stuck on loading? - review - zzzload not very ripe + #Twapi is relatively slow to load - can be 1s plus in normal cases - and much longer if there are disk performance issues. + + if {$loadstate ni [list failed]} { + #review zzzload usage + #puts stdout "=========== console loading twapi =============" + zzzload::pkg_wait twapi + package require twapi ;#should be fast once twapi dll loaded in zzzload thread + set ::punk::console::has_twapi 1 + + #todo - move some of these to the punk::console::local sub-namespace - as they use APIs rather than in-band ANSI to do their work. + #enableAnsi seems like it should be directly under punk::console .. but then it seems inconsistent if other local console-mode setting functions aren't. + #Find a compromise to organise things somewhat sensibly.. + + #this is really enableAnsi *processing* + proc [namespace parent]::enableAnsi {} { + #output handle modes + #Enable virtual terminal processing (sometimes off in older windows terminals) + #ENABLE_PROCESSED_OUTPUT = 0x0001 + #ENABLE_WRAP_AT_EOL_OUTPUT = 0x0002 + #ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + #DISABLE_NEWLINE_AUTO_RETURN = 0x0008 + set h_out [twapi::get_console_handle stdout] + set oldmode_out [twapi::GetConsoleMode $h_out] + set newmode_out [expr {$oldmode_out | 4}] ;#don't enable processed output too, even though it's required. keep symmetrical with disableAnsi? + + twapi::SetConsoleMode $h_out $newmode_out + + #what does window_input have to do with it?? + #input handle modes + #ENABLE_PROCESSED_INPUT 0x0001 ;#set to zero will allow ctrl-c to be reported as keyboard input rather than as a signal + #ENABLE_LINE_INPUT 0x0002 + #ENABLE_ECHO_INPUT 0x0004 + #ENABLE_WINDOW_INPUT 0x0008 (default off when a terminal created) + #ENABLE_MOUSE_INPUT 0x0010 + #ENABLE_INSERT_MODE 0X0020 + #ENABLE_QUICK_EDIT_MODE 0x0040 + #ENABLE_VIRTUAL_TERMINAL_INPUT 0x0200 (default off when a terminal created) (512) + set h_in [twapi::get_console_handle stdin] + set oldmode_in [twapi::GetConsoleMode $h_in] + set newmode_in [expr {$oldmode_in | 8}] + #set newmode_in [expr {$oldmode_in | 0x208}] + + twapi::SetConsoleMode $h_in $newmode_in + + return [list stdout [list from $oldmode_out to $newmode_out] stdin [list from $oldmode_in to $newmode_in]] + } + proc [namespace parent]::disableAnsi {} { + set h_out [twapi::get_console_handle stdout] + set oldmode_out [twapi::GetConsoleMode $h_out] + set newmode_out [expr {$oldmode_out & ~4}] + twapi::SetConsoleMode $h_out $newmode_out + + #??? review + set h_in [twapi::get_console_handle stdin] + set oldmode_in [twapi::GetConsoleMode $h_in] + set newmode_in [expr {$oldmode_in & ~8}] + twapi::SetConsoleMode $h_in $newmode_in + + + return [list stdout [list from $oldmode_out to $newmode_out] stdin [list from $oldmode_in to $newmode_in]] + } + + # + proc [namespace parent]::enableVirtualTerminal {{channels {input output}}} { + set ins [list in input stdin] + set outs [list out output stdout stderr] + set known [concat $ins $outs both] + set directions [list] + foreach v $channels { + if {$v in $ins} { + lappend directions input + } elseif {$v in $outs} { + lappend directions output + } elseif {$v eq "both"} { + lappend directions input output + } + if {$v ni $known} { + error "enableVirtualTerminal expected channel values to be one of '$known'. (all values mapped to input and/or output)" + } + } + set channels $directions ;#don't worry about dups. + if {"both" in $channels} { + lappend channels input output + } + set result [dict create] + if {"output" in $channels} { + #note setting stdout makes stderr have the same settings - ie there is really only one output to configure + set h_out [twapi::get_console_handle stdout] + set oldmode [twapi::GetConsoleMode $h_out] + set newmode [expr {$oldmode | 4}] + twapi::SetConsoleMode $h_out $newmode + dict set result output [list from $oldmode to $newmode] + } + + if {"input" in $channels} { + set h_in [twapi::get_console_handle stdin] + set oldmode_in [twapi::GetConsoleMode $h_in] + set newmode_in [expr {$oldmode_in | 0x200}] + twapi::SetConsoleMode $h_in $newmode_in + dict set result input [list from $oldmode_in to $newmode_in] + } + + return $result + } + proc [namespace parent]::disableVirtualTerminal {{channels {input output}}} { + set ins [list in input stdin] + set outs [list out output stdout stderr] + set known [concat $ins $outs both] + set directions [list] + foreach v $channels { + if {$v in $ins} { + lappend directions input + } elseif {$v in $outs} { + lappend directions output + } elseif {$v eq "both"} { + lappend directions input output + } + if {$v ni $known} { + error "disableVirtualTerminal expected channel values to be one of '$known'. (all values mapped to input and/or output)" + } + } + set channels $directions ;#don't worry about dups. + if {"both" in $channels} { + lappend channels input output + } + set result [dict create] + if {"output" in $channels} { + #as above - configuring stdout does stderr too + set h_out [twapi::get_console_handle stdout] + set oldmode [twapi::GetConsoleMode $h_out] + set newmode [expr {$oldmode & ~4}] + twapi::SetConsoleMode $h_out $newmode + dict set result output [list from $oldmode to $newmode] + } + if {"input" in $channels} { + set h_in [twapi::get_console_handle stdin] + set oldmode_in [twapi::GetConsoleMode $h_in] + set newmode_in [expr {$oldmode_in & ~0x200}] + twapi::SetConsoleMode $h_in $newmode_in + dict set result input [list from $oldmode_in to $newmode_in] + } + + #return [list stdout [list from $oldmode_out to $newmode_out] stdin [list from $oldmode_in to $newmode_in]] + return $result + } + + proc [namespace parent]::enableProcessedInput {} { + set h_in [twapi::get_console_handle stdin] + set oldmode_in [twapi::GetConsoleMode $h_in] + set newmode_in [expr {$oldmode_in | 1}] + twapi::SetConsoleMode $h_in $newmode_in + return [list stdin [list from $oldmode_in to $newmode_in]] + } + proc [namespace parent]::disableProcessedInput {} { + set h_in [twapi::get_console_handle stdin] + set oldmode_in [twapi::GetConsoleMode $h_in] + set newmode_in [expr {$oldmode_in & ~1}] + twapi::SetConsoleMode $h_in $newmode_in + return [list stdin [list from $oldmode_in to $newmode_in]] + } + + + + } else { + + puts stderr "punk::console falling back to stty because twapi load failed" + proc [namespace parent]::enableAnsi {} { + puts stderr "punk::console::enableAnsi todo" + } + proc [namespace parent]::disableAnsi {} { + } + #? + proc [namespace parent]::enableVirtualTerminal {{channels {input output}}} { + } + proc [namespace parent]::disableVirtualTerminal {{channels {input output}}} { + } + proc [namespace parent]::enableProcessedInput {args} { + + } + proc [namespace parent]::disableProcessedInput {args} { + + } + + } + + proc [namespace parent]::enableRaw {{channel stdin}} { + variable is_raw + variable previous_stty_state_$channel + + if {[package provide twapi] ne ""} { + set console_handle [twapi::get_console_handle stdin] + #returns dictionary + #e.g -processedinput 1 -lineinput 1 -echoinput 1 -windowinput 0 -mouseinput 0 -insertmode 1 -quickeditmode 1 -extendedmode 1 -autoposition 0 + set oldmode [twapi::get_console_input_mode] + twapi::modify_console_input_mode $console_handle -lineinput 0 -echoinput 0 + # Turn off the echo and line-editing bits + #set newmode [dict merge $oldmode [dict create -lineinput 0 -echoinput 0]] + set newmode [twapi::get_console_input_mode] + + set is_raw 1 + #don't disable handler - it will detect is_raw + ### twapi::set_console_control_handler {} + return [list stdin [list from $oldmode to $newmode]] + } elseif {[set sttycmd [auto_execok stty]] ne ""} { + if {[set previous_stty_state_$channel] eq ""} { + set previous_stty_state_$channel [exec {*}$sttycmd -g <@$channel] + } + + exec {*}$sttycmd raw -echo <@$channel + set is_raw 1 + #review - inconsistent return dict + return [dict create stdin [list from [set previous_stty_state_$channel] to "" note "fixme - to state not shown"]] + } else { + error "punk::console::enableRaw Unable to use twapi or stty to set raw mode - aborting" + } + } + + #note: twapi GetStdHandle & GetConsoleMode & SetConsoleCombo unreliable - fails with invalid handle (somewhat intermittent.. after stdin reopened?) + #could be we were missing a step in reopening stdin and console configuration? + + proc [namespace parent]::disableRaw {{channel stdin}} { + variable is_raw + variable previous_stty_state_$channel + + if {[package provide twapi] ne ""} { + set console_handle [twapi::get_console_handle stdin] + set oldmode [twapi::get_console_input_mode] + # Turn on the echo and line-editing bits + twapi::modify_console_input_mode $console_handle -lineinput 1 -echoinput 1 + set newmode [twapi::get_console_input_mode] + set is_raw 0 + return [list stdin [list from $oldmode to $newmode]] + } elseif {[set sttycmd [auto_execok stty]] ne ""} { + set sttycmd [auto_execok stty] + if {[set previous_stty_state_$channel] ne ""} { + exec {*}$sttycmd [set previous_stty_state_$channel] + set previous_stty_state_$channel "" + return restored + } + exec {*}$sttycmd -raw echo <@$channel + set is_raw 0 + #do we really want to exec stty yet again to show final 'to' state? + #probably not. We should work out how to read the stty result flags and set a result.. or just limit from,to to showing echo and lineedit states. + return [list stdin [list from "[set previous_stty_state_$channel]" to "" note "fixme - to state not shown"]] + } else { + error "punk::console::disableRaw Unable to use twapi or stty to unset raw mode - aborting" + } + } + + + } + + #capturingendregex should capture ANY prefix, whole escape match - and a subcapture of the data we're interested in and match at end of string. + #ie {(.*)(ESC(info)end)$} + #e.g {(.*)(\x1bP44!~([:alnum:])\x1b\\)$} + #we expect 4 results from regexp -indices -inline (overallmatch, prefix, wholeescape,info) + #todo - check capturingendregex value supplied has appropriate captures and tail-anchor + proc get_ansi_response_payload {query capturingendregex {inoutchannels {stdin stdout}}} { + lassign $inoutchannels input output + + #chunks from input that need to be handled by readers + upvar ::punk::console::input_chunks_waiting input_chunks_waiting + + #we need to cooperate with other stdin/$input readers and put data here if we overconsume. + #Main repl reader may be currently active - or may be inactive. + #This call could come from within code called by the main reader - or from user code running while main read-loop is temporarily disabled + #In other contexts there may not even be another input reader + + #REVIEW - what if there is existing data in input_chunks_waiting - is it for us? + #temp - let's keep alert to it until we decide if it's legit/required.. + if {[info exists input_chunks_waiting($input)] && [llength $input_chunks_waiting($input)]} { + #puts stderr "[punk::ansi::a+ cyan bold]get_ansi_response_payload called while input_chunks_waiting($input) contained data: $input_chunks_waiting($input)[punk::ansi::a]" + } + if {!$::punk::console::ansi_available} { + return "" + } + set callid [info cmdcount] ;#info cmdcount is almost as fast as clock clicks - and whilst not unique in a long-running app(will wrap?) - fine for this context + # + + upvar ::punk::console::ansi_response_chunk accumulator + upvar ::punk::console::ansi_response_wait waitvar + upvar ::punk::console::ansi_response_queue queue + upvar ::punk::console::ansi_response_queuedata queuedata + upvar ::punk::console::ansi_response_clock clock + upvar ::punk::console::ansi_response_timeoutid timeoutid + set accumulator($callid) "" + set waitvar($callid) "" + lappend queue $callid + + + + #todo - use a linked array and an accumulatorid and waitvar id? When can there be more than one terminal query in-flight? + + set existing_handler [fileevent $input readable] ;#review! + set this_handler ::punk::console::internal::ansi_response_handler_regex + if {[lindex $existing_handler 0] eq $this_handler} { + puts stderr "[punk::ansi::a+ red]Warning for callid $callid get_ansi_response_payload called while existing ansi response handler in place[a]: $this_handler" + puts stderr "queue state: $queue" + flush stderr + + if {[lindex $queue 0] ne $callid} { + } + error "get_ansi_response_payload - re-entrancy unrecoverable" + } + + fileevent $input readable {} + + set input_state [fconfigure $input] + #todo - make timeout configurable? + set waitvarname "::punk::console::ansi_response_wait($callid)" + #500ms is generally plenty for a terminal to respond.. but not in some cases. e.g event loop busy with stdin keypress?? review + set timeoutid($callid) [after 2000 [list set $waitvarname timedout]] + + #JMN + # - stderr vs stdout + #It has to be same channel as used by functions such as test_char_width or erroneous results returned for those functions + #(presumably race conditions as to when data hits console?) + #review - experiment changing this and calling functions to stderr and see if it works + #review - Are there disadvantages to using stdout vs stderr? + + #puts stdout "sending console request [ansistring VIEW $query]" + puts -nonewline $output $query;flush $output + + #todo - test and save rawstate so we don't disableRaw if console was already raw + if {!$::punk::console::is_raw} { + set was_raw 0 + punk::console::enableRaw + } else { + set was_raw 1 + } + fconfigure $input -blocking 0 + # + #in handler - its used for a boolean match (capturing aspect not used) + set clock($callid) [clock millis] ;#time of launch - may be delay before first event depending on what's going on + + #first shot without using filevent, call the stdin reader directly - maybe it's there already + #This can be significantly faster than setting up a fileevent (2024 e.g 1.5ms vs 65ms) + $this_handler $input $callid $capturingendregex + if {$waitvar($callid) ne "ok"} { + fileevent $input readable [list $this_handler $input $callid $capturingendregex] + } + + #JMN + + #response from terminal + #e.g for cursor position \033\[46;1R + + + if {[set waitvar($callid)] eq ""} { + vwait ::punk::console::ansi_response_wait($callid) + #puts stderr ">>>> end vwait1 $waitvar($callid)<<<<" + while {[string match extend-* $waitvar($callid)]} { + set extension [lindex [split $waitvar($callid) -] 1] + #puts stderr "get_ansi_response_payload Extending timeout by $extension" + #after cancel $timeoutid($callid) + set timeoutid($callid) [after $extension [list set $waitvarname timedout]] + vwait ::punk::console::ansi_response_wait($callid) + } + } + #response handler automatically removes it's own fileevent + fileevent $input readable {} ;#explicit remove anyway - review + + if {$waitvar($callid) ne "timedout"} { + after cancel $timeoutid($callid) + } else { + puts stderr "timeout in get_ansi_response_payload. callid $callid Ansi request was:[ansistring VIEW -lf 1 -vt 1 $query]" + } + + if {$was_raw == 0} { + punk::console::disableRaw + } + #restore $input state + fconfigure $input -blocking [dict get $input_state -blocking] + + + + set response [set accumulator($callid)] + + if {$response ne ""} { + set got_match [regexp -indices $capturingendregex $response _match_indices prefix_indices response_indices payload_indices] + if {$got_match} { + set responsedata [string range $response {*}$response_indices] + set payload [string range $response {*}$payload_indices] + set prefixdata [string range $response {*}$prefix_indices] + if {$prefixdata ne ""} { + #puts stderr "Warning - get_ansi_response_payload read extra data at start - '[ansistring VIEW -lf 1 $prefixdata]' (response=[ansistring VIEW -lf 1 $responsedata])" + lappend input_chunks_waiting($input) $prefixdata + } + } else { + #timedout - or eof? + puts stderr "get_ansi_response_payload callid:$callid regex match '$capturingendregex' to data '[ansistring VIEW -lf 1 -vt 1 $response]' not found" + lappend input_chunks_waiting($input) $response + set payload "" + } + } else { + #timedout or eof? and nothing read + set payload "" + } + + #is there a way to know if existing_handler is input_chunks_waiting aware? + if {[string length $existing_handler] && [lindex $existing_handler 0] ne $this_handler} { + #puts "get_ansi_response_payload reinstalling ------>$existing_handler<------" + fileevent $input readable $existing_handler + #we may have consumed all pending input on $input - so there may be no trigger for the readable fileevent + if {[llength $input_chunks_waiting($input)]} { + #This is experimental If a handler is aware of input_chunks_waiting - there should be no need to schedule a trigger + #If it isn't, but the handler can accept an existing chunk of data as a 'waiting' argument - we could trigger and pass it the waiting chunks - but there's no way to know its API. + #we could look at info args - but that's not likely to tell us much in a robust way. + #we could create a reflected channel for stdin? That is potentially an overreach..? + #triggering it manually... as it was already listening - this should generally do no harm as it was the active reader anyway, but won't help with the missing data if it's input_chunks_waiting-unaware. + set handler_args [info args [lindex $existing_handler 0]] + if {[lindex $handler_args end] eq "waiting"} { + #Looks like the existing handler is setup for punk repl cooperation. + + puts stdout "\n\n[punk::ansi::a+ yellow bold]-->punk::console::get_ansi_response_payload callid $callid triggering existing handler\n $existing_handler while over-read data is in punk::console::input_chunks_waiting($input) instead of channel[punk::ansi::a]" + puts stdout "[punk::ansi::a+ yellow bold]-->waiting: [ansistring VIEW -lf 1 -vt 1 $input_chunks_waiting($input)][punk::ansi::a]" + flush stdout + + #concat and supply to existing handler in single text block - review + #Note will only + set waitingdata [join $input_chunks_waiting($input) ""] + set input_chunks_waiting($input) [list] + #after idle [list after 0 [list {*}$existing_handler $waitingdata]] + after idle [list {*}$existing_handler $waitingdata] ;#after 0 may be put ahead of events it shouldn't be - review + unset waitingdata + } else { + #! todo? for now, emit a clue as to what's happening. + puts stderr "[punk::ansi::a+ yellow bold]-->punk::console::get_ansi_response_payload cannot trigger existing handler $existing_handler while over-read data is in punk::console::input_chunks_waiting($input) instead of channel [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]" + if {[package provide punk::repl::codethread] ne "" && [punk::repl::codethread::is_running]} { + if {[eof $input]} { + puts stdout "restarting repl" + repl::reopen_stdin + } + } + } + } + #Note - we still may be in_repl_handler here (which disables its own reader while executing commandlines) + #The input_chunks_waiting may really belong to the existing_handler we found - but if it doesn't consume them they will end up being read by the repl_handler when it eventually re-enables. + #todo - some better structure than just a list of waiting chunks indexed by input channel, so repl/other handlers can determine the context in which these waiting chunks were generated? + } elseif {[package provide punk::repl::codethread] ne "" && [punk::repl::codethread::is_running]} { + if {[llength $input_chunks_waiting($input)]} { + #don't trigger the repl handler manually - we will inevitably get things out of order - as it knows when to enable/disable itself based on whether chunks are waiting. + #triggering it by putting it on the eventloop will potentially result in re-entrancy + #The cooperating reader must be designed to consume waiting chunks and only reschedule it's channel read handler once all waiting chunks have been consumed. + #puts stderr "[punk::ansi::a+ green bold]--> repl_handler has chunks to consume [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]" + } + if {[eof $input]} { + #test + puts stdout "restarting repl" + repl::reopen stdin + } + } + + catch { + unset accumulator($callid) + unset waitvar($callid) + dict unset queuedata $callid + } + if {[llength $queue] > 1} { + set next_callid [lindex $queue 1] + set waitvar($callid) go_ahead + } + lpop queue 0 + + + #set punk::console::chunk "" + return $payload + } + + + #review - reading 1 byte at a time and repeatedly running the small capturing/completion regex seems a little inefficient... but we don't have a way to peek or put back chars (?) + #review (we do have the punk::console::input_chunks_waiting($chan) array to cooperatively put back data - but this won't work for user scripts not aware of this) + #review - timeout - what if terminal doesn't put data on stdin? error vs stderr report vs empty results + #review - Main loop may need to detect some terminal responses and store them for lookup instead-of or as-well-as this handler? + #e.g what happens to mouse-events while user code is executing? + #we may still need this handler if such a loop doesn't exist. + proc ansi_response_handler_regex {chan callid endregex} { + upvar ::punk::console::ansi_response_chunk chunks + upvar ::punk::console::ansi_response_wait waits + upvar ::punk::console::ansi_response_clock clock ;#initial time in millis was set when fileevent was created + + #endregex should explicitly have a trailing $ + set status [catch {read $chan 1} bytes] + if { $status != 0 } { + # Error on the channel + fileevent $chan readable {} + puts "ansi_response_handler_regex error reading $chan: $bytes" + set waits($callid) [list error_read status $status bytes $bytes] + } elseif {$bytes ne ""} { + # Successfully read the channel + #puts "got: [string length $bytes]bytes" + append chunks($callid) $bytes + #puts stderr [ansistring VIEW $chunks($callid)] + if {[regexp $endregex $chunks($callid)]} { + fileevent $chan readable {} + #puts stderr "matched - setting ansi_response_wait($callid) ok" + set waits($callid) ok + } else { + if {[string length $chunks($callid)] % 10 == 0 || $clock($callid) - [clock millis] > 50} { + after cancel $::punk::console::ansi_response_timeoutid($callid) + set waits($callid) extend-1000 + } + } + } elseif {[catch {eof $chan}] || [eof $chan]} { + catch {fileevent $chan readable {}} + # End of file on the channel + #review + puts stderr "ansi_response_handler_regex end of file on channel $chan" + set waits($callid) eof + } elseif {![catch {fblocked $chan}] && [fblocked $chan]} { + # Read blocked. Just return + # Caller should be using timeout on the wait variable + } else { + fileevent $chan readable {} + # Something else + puts stderr "ansi_response_handler_regex Situation shouldn't be possible. No error and no bytes read on channel $chan but chan is not fblocked or EOF" + set waits($callid) error_unknown_zerobytes_while_not_blocked_or_eof + } + } + } ;#end namespace eval internal + + variable colour_disabled 0 + #todo - move to punk::config + # https://no-color.org + if {[info exists ::env(NO_COLOR)]} { + if {$::env(NO_COLOR) ne ""} { + set colour_disabled 1 + } + } + + #a and a+ functions are not very useful when emitting directly to console + #e.g puts [punk::console::a red]test[punk::console::a cyan] would produce a cyan coloured test as the commands are evaluated first + + proc a? {args} { + #stdout + variable ansi_wanted + if {$ansi_wanted <= 0} { + puts -nonewline [punk::ansi::ansistripraw [::punk::ansi::a?]] + } else { + tailcall ansi::a? {*}$args + } + } + + proc code_a+ {args} { + variable ansi_wanted + if {$ansi_wanted <= 0} { + return + } + #a and a+ are called a *lot* - avoid even slight overhead of tailcall as it doesn't give us anything useful here + #tailcall punk::ansi::a+ {*}$args + ::punk::ansi::a+ {*}$args + } + proc code_a {args} { + variable ansi_wanted + if {$ansi_wanted <= 0} { + return + } + #tailcall punk::ansi::a {*}$args + ::punk::ansi::a {*}$args + } + proc code_a? {args} { + variable ansi_wanted + if {$ansi_wanted <= 0} { + return [punk::ansi::ansistripraw [::punk::ansi::a? {*}$args]] + } else { + tailcall ::punk::ansi::a? {*}$args + } + } + + #REVIEW! this needs reworking. + #It needs to be clarified as to what ansi off is supposed to do. + #Turning ansi off only stops new ansi being generated - but what about codes stored in configurations of existing elements such as tables/frames? + #It will stop underlines/bold/reverse as well as SGR colours + #what about ansi movement codes etc? + proc ansi {{onoff {}}} { + variable ansi_wanted + if {[string length $onoff]} { + set onoff [string tolower $onoff] + switch -- $onoff { + 1 - + on - + true - + yes { + set ansi_wanted 1 + } + 0 - + off - + false - + no { + set ansi_wanted 0 + punk::ansi::sgr_cache -action clear + } + default { + set ansi_wanted 2 + } + default { + error "punk::console::ansi expected 0|1|on|off|true|false|yes|no|default" + } + } + } + catch {punk::repl::reset_prompt} + return [expr {$ansi_wanted}] + } + + #colour + # Turning colour off will stop SGR colour codes from being generated unless 'forcecolour' is added to the argument list for the punk::ans::a functions + proc colour {{on {}}} { + variable colour_disabled + if {$on ne ""} { + if {![string is boolean -strict $on]} { + error "punk::console::colour expected a boolean e.g 0|1|on|off|true|false|yes|no" + } + #an experiment with complete disabling vs test of state for each call + if {$on} { + if {$colour_disabled} { + #change of state + punk::ansi::sgr_cache -action clear + catch {punk::repl::reset_prompt} + set colour_disabled 0 + } + } else { + #we don't disable a/a+ entirely - they must still emit underlines/bold/reverse + if {!$colour_disabled} { + #change of state + punk::ansi::sgr_cache -action clear + catch {punk::repl::reset_prompt} + set colour_disabled 1 + } + } + } + return [expr {!$colour_disabled}] + } + + + namespace eval ansi { + proc a {args} { + puts -nonewline [::punk::ansi::a {*}$args] + } + proc a? {args} { + puts -nonewline stdout [::punk::ansi::a? {*}$args] + } + proc a+ {args} { + puts -nonewline [::punk::ansi::a+ {*}$args] + } + proc clear {} { + puts -nonewline stdout [punk::ansi::clear] + } + proc clear_above {} { + puts -nonewline stdout [punk::ansi::clear_above] + } + proc clear_below {} { + puts -nonewline stdout [punk::ansi::clear_below] + } + proc clear_all {} { + puts -nonewline stdout [punk::ansi::clear_all] + } + proc reset {} { + puts -nonewline stdout [punk::ansi::reset] + } + } + namespace import ansi::clear + namespace import ansi::clear_above + namespace import ansi::clear_below + namespace import ansi::clear_all + namespace import ansi::reset + + namespace eval local { + proc set_codepage_output {cpname} { + #todo + if {"windows" eq $::tcl_platform(platform)} { + twapi::set_console_output_codepage $cpname + } else { + error "set_codepage_output unimplemented on $::tcl_platform(platform)" + } + } + proc set_codepage_input {cpname} { + #todo + if {"windows" eq $::tcl_platform(platform)} { + twapi::set_console_input_codepage $cpname + } else { + error "set_codepage_input unimplemented on $::tcl_platform(platform)" + } + } + } + namespace import local::set_codepage_output + namespace import local::set_codepage_input + + # -- --- --- --- --- --- --- + #get_ansi_response functions + #review - can these functions sensibly be used on channels not attached to the local console? + #ie can we default to {stdin stdout} but allow other channel pairs? + # -- --- --- --- --- --- --- + proc get_cursor_pos {{inoutchannels {stdin stdout}}} { + #response from terminal + #e.g \033\[46;1R + set capturingregex {(.*)(\x1b\[([0-9]+;[0-9]+)R)$} ;#must capture prefix,entire-response,response-payload + + set request "\033\[6n" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } + proc get_checksum_rect {id page t l b r {inoutchannels {stdin stdout}}} { + #e.g \x1b\[P44!~E797\x1b\\ + #re e.g {(.*)(\x1b\[P44!~([[:alnum:]])\x1b\[\\)$} + set capturingregex [string map [list %id% $id] {(.*)(\x1bP%id%!~([[:alnum:]]+)\x1b\\)$}] + set request "\x1b\[${id}\;${page}\;$t\;$l\;$b\;$r*y" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } + proc get_device_status {{inoutchannels {stdin stdout}}} { + set capturingregex {(.*)(\x1b\[([0-9]+)n)$} ;#must capture prefix,entire-response,response-payload + set request "\x1b\[5n" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } + proc get_tabstops {{inoutchannels {stdin stdout}}} { + #DECTABSR \x1b\[2\$w + #response example " ^[P2$u9/17/25/33/41/49/57/65/73/81^[\ " (where ^[ is \x1b) + #set capturingregex {(.*)(\x1b\[P2$u()\x1b\[\\)} + #set capturingregex {(.*)(\x1bP2$u((?:[0-9]+)*(?:\/[0-9]+)*)\x1b\\)$} + set capturingregex {(.*)(\x1bP2\$u(.*)\x1b\\)$} + set request "\x1b\[2\$w" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + set tabstops [split $payload "/"] + return $tabstops + } + + #a simple estimation of tab-width under assumption console is set with even spacing. + #It's known this isn't always the case - but things like textutil::untabify2 take only a single value + #on some systems test_char_width is a similar speed to get_tabstop_apparent_width - but on some test_char_width is much slower + #we will use test_char_width as a fallback + proc get_tabstop_apparent_width {} { + set tslist [get_tabstops] + if {![llength $tslist]} { + #either terminal failed to report - or none set. + set testw [test_char_width \t] + if {[string is integer -strict $testw]} { + return $testw + } + #We don't support none - default to 8 + return 8 + } + #we generally expect to see a tabstop at column 1 - but it may not be set. + if {[lindex $tslist 0] eq "1"} { + if {[llength $tslist] == 1} { + set testw [test_char_width \t] + if {[string is integer -strict $testw]} { + return $testw + } + return 8 + } else { + set next [lindex $tslist 1] + return [expr {$next - 1}] + } + } else { + #simplistic guess at width - review - do we need to consider leftmost tabstops as more likely to be non-representative and look further into the list? + if {[llength $tslist] == 1} { + return [lindex $tslist 0] + } else { + return [expr {[lindex $tslist 1] - [lindex $tslist 0]}] + } + } + } + #default to 8 just because it seems to be most common default in terminals + proc set_tabstop_width {{w 8}} { + set tsize [get_size] + set width [dict get $tsize columns] + set mod [expr {$width % $w}] + set max [expr {$width - $mod}] + set tstops "" + set c 1 + while {$c <= $max} { + append tstops [string repeat " " $w][punk::ansi::set_tabstop] + incr c $w + } + set punk::console::tabwidth $w ;#we also attempt to read terminal's tabstops and set tabwidth to the apparent spacing of first non-1 value in tabstops list. + catch {textutil::tabify::untabify2 "" $w} ;#textutil tabify can end up uninitialised and raise errors like "can't read Spaces().." after a tabstop change This call seems to keep tabify happy - review. + puts -nonewline "[punk::ansi::clear_all_tabstops]\n[punk::ansi::set_tabstop]$tstops" + } + + + proc get_cursor_pos_list {{inoutchannels {stdin stdout}}} { + return [split [get_cursor_pos $inoutchannels] ";"] + } + + #todo - determine cursor on/off state before the call to restore properly. May only be possible + proc get_size {{inoutchannels {stdin stdout}}} { + lassign $inoutchannels in out + #we can't reliably use [chan names] for stdin,stdout. There could be stacked channels and they may have a names such as file22fb27fe810 + #chan eof is faster whether chan exists or not than + if {[catch {chan eof $in} is_eof]} { + error "punk::console::get_size input channel $in seems to be closed ([info level 1])" + } else { + if {$is_eof} { + error "punk::console::get_size eof on input channel $in ([info level 1])" + } + } + if {[catch {chan eof $out} is_eof]} { + error "punk::console::get_size output channel $out seems to be closed ([info level 1])" + } else { + if {$is_eof} { + error "punk::console::get_size eof on output channel $out ([info level 1])" + } + } + + #keep out of catch - no point in even trying a restore move if we can't get start position - just fail here. + lassign [get_cursor_pos_list $inoutchannels] start_row start_col + + if {[catch { + #some terminals (conemu on windows) scroll the viewport when we make a big move down like this - a move to 1 1 immediately after cursor_save doesn't seem to fix that. + #This issue also occurs when switching back from the alternate screen buffer - so perhaps that needs to be addressed elsewhere. + puts -nonewline $out [punk::ansi::cursor_off][punk::ansi::move 2000 2000] + lassign [get_cursor_pos_list $inoutchannels] lines cols + puts -nonewline $out [punk::ansi::move $start_row $start_col][punk::console::cursor_on];flush stdout + set result [list columns $cols rows $lines] + } errM]} { + puts -nonewline $out [punk::ansi::move $start_row $start_col] + puts -nonewline $out [punk::ansi::cursor_on] + error "$errM" + } else { + return $result + } + } + + #faster - but uses cursor_save - which we may want to avoid if calling during another operation which uses cursor save/restore + proc get_size_cursorrestore {} { + if {[catch { + #some terminals (conemu on windows) scroll the viewport when we make a big move down like this - a move to 1 1 immediately after cursor_save doesn't seem to fix that. + #This issue also occurs when switching back from the alternate screen buffer - so perhaps that needs to be addressed elsewhere. + puts -nonewline [punk::ansi::cursor_off][punk::ansi::cursor_save_dec][punk::ansi::move 2000 2000] + lassign [get_cursor_pos_list] lines cols + puts -nonewline [punk::ansi::cursor_restore][punk::console::cursor_on];flush stdout + set result [list columns $cols rows $lines] + } errM]} { + puts -nonewline [punk::ansi::cursor_restore_dec] + puts -nonewline [punk::ansi::cursor_on] + error "$errM" + } else { + return $result + } + } + proc get_dimensions {{inoutchannels {stdin stdout}}} { + lassign [get_size $inoutchannels] _c cols _l lines + return "${cols}x${lines}" + } + #the (xterm?) CSI 18t query is supported by *some* terminals + proc get_xterm_size {{inoutchannels {stdin stdout}}} { + set capturingregex {(.*)(\x1b\[8;([0-9]+;[0-9]+)t)$} ;#must capture prefix,entire-response,response-payload + set request "\x1b\[18t" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + lassign [split $payload {;}] rows cols + return [list columns $cols rows $rows] + } + proc get_mode_line_wrap {{inoutchannels {stdin stdout}}} { + set capturingregex {(.*)(\x1b\[\?7;([0-9]+)\$y)$} ;#must capture prefix,entire-response,response-payload + set request "\x1b\[?7\$p" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } + + #Terminals generally default to LNM being reset (off) ie enter key sends a lone + #Terminals tested on windows either don't respond to this query, or respond with 0 (meaning mode not understood) + #I presume from this that almost nobody is using LNM 1 (which sends both and ) + proc get_mode_LNM {{inoutchannels {stdin stdout}}} { + set capturingregex {(.*)(\x1b\[\?20;([0-9]+)\$y)$} ;#must capture prefix,entire-response,response-payload + set request "\x1b\[?20\$p" + set payload [punk::console::internal::get_ansi_response_payload $request $capturingregex $inoutchannels] + return $payload + } + + #terminals lie. This should be a reasonable (albeit relatively slow) test of actual width - but some terminals seem to miscalculate. + #todo - a visual interactive test/questionnaire to ask user if things are lining up or if the terminal is telling fibs about cursor position. + #todo - determine if these anomalies are independent of font + #punk::ansi should be able to glean widths from unicode data files - but this may be incomplete - todo - compare with what terminal actually does. + proc test_char_width {char_or_string {emit 0}} { + #return 1 + #JMN + #puts stderr "cwtest" + variable ansi_available + if {!$ansi_available} { + puts stderr "No ansi - cannot test char_width of '$char_or_string' returning [string length $char_or_string]" + return [string length $char_or_string] + } + + if {!$emit} { + puts -nonewline stdout \033\[2K\033\[1G ;#2K erase line 1G cursor at col1 + } + set response "" + if {[catch { + set response [punk::console::get_cursor_pos] + } errM]} { + puts stderr "Cannot test_char_width for '[punk::ansi::ansistring VIEW $char_or_string]' - may be no console? Error message from get_cursor_pos: $errM" + return + } + lassign [split $response ";"] _row1 col1 + if {![string length $response] || ![string is integer -strict $col1]} { + puts stderr "test_char_width Could not interpret response from get_cursor_pos for initial cursor pos. Response: '[punk::ansi::ansistring VIEW $response]'" + flush stderr + return + } + + puts -nonewline stdout $char_or_string + set response [punk::console::get_cursor_pos] + lassign [split $response ";"] _row2 col2 + if {![string is integer -strict $col2]} { + puts stderr "test_char_width could not interpret response from get_cursor_pos for post-emit cursor pos. Response:'[punk::ansi::ansistring VIEW $response]'" + flush stderr + return + } + + if {!$emit} { + puts -nonewline stdout \033\[2K\033\[1G + } + flush stdout;#if we don't flush - a subsequent stderr write could move the cursor to a newline and interfere with our 2K1G erasure and cursor repositioning. + return [expr {$col2 - $col1}] + } + + #todo! - improve ideally we want to use VT sequences to determine - and make a separate utility for testing via systemcalls/os api + proc test_can_ansi {} { + #don't set ansi_avaliable here - we want to be able to change things, retest etc. + if {"windows" eq "$::tcl_platform(platform)"} { + if {[package provide twapi] ne ""} { + set h_out [twapi::get_console_handle stdout] + set existing_mode [twapi::GetConsoleMode $h_out] + if {[expr {$existing_mode & 4}]} { + #virtual terminal processing happens to be enabled - so it's supported + return 1 + } + #output mode + #ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + + #try temporarily setting it - if we get an error - ansi not supported + if {[catch { + twapi::SetConsoleMode $h_out [expr {$existing_mode | 4}] + } errM]} { + return 0 + } + #restore + twapi::SetConsoleMode $h_out [expr {$existing_mode & ~4}] + return 1 + } else { + #todo - try a cursorpos query and read stdin to see if we got a response? + puts stderr "Unable to verify terminal ansi support - assuming modern default of true" + puts stderr "to force disable, use command: ansi off" + return 1 + } + } else { + return 1 + } + } + + #review + proc can_ansi {} { + variable ansi_available + if {!$ansi_available} { + return 0 + } + set ansi_available [test_can_ansi] + return [expr {$ansi_available}] + } + + namespace eval ansi { + proc cursor_on {} { + puts -nonewline stdout [punk::ansi::cursor_on] + } + proc cursor_off {} { + puts -nonewline stdout [punk::ansi::cursor_off] + } + } + namespace import ansi::cursor_on + namespace import ansi::cursor_off + + #review - the concept of using local mechanisms at all (ie apis) vs ansi is not necessarily something we want/need to support. + #For the system to be really useful if needs to operate in conditions where the terminal is remote + #This seems to be why windows console is deprecating various non-ansi api methods for interacting with the console. + namespace eval local { + proc titleset {windowtitle} { + if {"windows" eq $::tcl_platform(platform)} { + if {![catch {twapi::set_console_title $windowtitle} result]} { + return $windowtitle + } else { + error "punk::console::titleset failed to set title - try punk::console::ansi::titleset" + } + } else { + error "punk::console::titleset has no local mechanism to set the window title on this platform. try punk::console::ansi::titleset" + } + } + proc titleget {} { + if {"windows" eq $::tcl_platform(platform)} { + if {![catch {twapi::get_console_title} result]} { + return $result + } else { + error "punk::console::titleset failed to set title - ensure twapi is available" + } + } else { + #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title + # won't work on all platforms/terminals - but may be worth implementing + error "punk::console::titleget has no local mechanism to get the window title on this platform." + } + } + } + + namespace eval ansi { + proc titleset {windowtitle} { + puts -nonewline stdout [punk::ansi::titleset $windowtitle] + } + } + #namespace import ansi::titleset + proc titleset {windowtitle} { + variable ansi_wanted + if { $ansi_wanted <= 0} { + punk::console::local::titleset $windowtitle + } else { + tailcall ansi::titleset $windowtitle + } + } + #no known pure-ansi solution + proc titleget {} { + return [local::titleget] + } + + proc infocmp {} { + set cmd1 [auto_execok infocmp] + if {[string length $cmd1]} { + puts stderr "" + return [exec {*}$cmd1] + } else { + puts stderr "infocmp doesn't seem to be present" + if {$::tcl_platform(platform) eq "FreeBSD"} { + puts stderr "For FreeBSD - install ncurses to get infocmp and related binaries and also install terminfo-db" + } + set tcmd [auto_execok tput] + if {[string length $tcmd]} { + puts stderr "tput seems to be available. Try something like: tput -S - (freebsd)" + } + #todo - what? can tput query all caps? OS differences? + } + } + + + #todo - compare speed with get_cursor_pos - work out why the big difference + proc test_cursor_pos {} { + if {!$::punk::console::is_raw} { + set was_raw 0 + enableRaw + } else { + set was_raw 1 + } + puts -nonewline stdout \033\[6n ;flush stdout + fconfigure stdin -blocking 0 + set info [read stdin 20] ;# + after 1 + if {[string first "R" $info] <=0} { + append info [read stdin 20] + } + if {!$was_raw} { + disableRaw + } + set data [string range [string trim $info] 2 end-1] + return [split $data ";"] + } + + #channel? + namespace eval ansi { + proc move {row col} { + puts -nonewline stdout [punk::ansi::move $row $col] + } + proc move_forward {n} { + puts -nonewline stdout [punk::ansi::move_forward $n] + } + proc move_back {n} { + puts -nonewline stdout [punk::ansi::move_back $n] + } + proc move_up {n} { + puts -nonewline stdout [punk::ansi::move_up $n] + } + proc move_down {n} { + puts -nonewline stdout [punk::ansi::move_down $n] + } + proc move_column {col} { + puts -nonewline stdout [punk::ansi::move_column $col] + } + proc move_row {row} { + puts -nonewline stdout [punk::ansi::move_row $col] + } + proc move_emit {row col data args} { + puts -nonewline stdout [punk::ansi::move_emit $row $col $data {*}$args] + } + proc move_emit_return {row col data args} { + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + set out "" + append out [punk::ansi::move_emit $row $col $data {*}$args] + if {!$is_in_raw} { + incr orig_row -1 + } + move $orig_row $orig_col + } + proc scroll_up {n} { + puts -nonewline stdout [punk::ansi::scroll_up $n] + } + proc scroll_down {n} { + puts -nonewline stdout [punk::ansi::scroll_down $n] + } + proc enable_alt_screen {} { + puts -nonewline stdout [punk::ansi::enable_alt_screen] + } + proc disable_alt_screen {} { + puts -nonewline stdout [punk::ansi::disable_alt_screen] + } + + #review - worth the extra microseconds to inline? might be if used in for example prompt on every keypress. + #caller should build as much as possible using the punk::ansi versions to avoid extra puts calls + proc cursor_save {} { + #*** !doctools + #[call [fun cursor_save]] + puts -nonewline \x1b\[s + } + proc cursor_restore {} { + #*** !doctools + #[call [fun cursor_restore]] + puts -nonewline \x1b\[u + } + #DEC equivalents of cursor_save/cursor_restore - perhaps more widely supported? + proc cursor_save_dec {} { + #*** !doctools + #[call [fun cursor_save_dec]] + puts -nonewline \x1b7 + } + proc cursor_restore_dec {} { + #*** !doctools + #[call [fun cursor_restore_dec]] + puts -nonewline \x1b8 + } + + proc insert_spaces {count} { + puts -nonewline stdout \x1b\[${count}@ + } + proc delete_characters {count} { + puts -nonewline \x1b\[${count}P + } + proc erase_characters {count} { + puts -nonewline \x1b\[${count}X + } + proc insert_lines {count} { + puts -nonewline \x1b\[${count}L + } + proc delete_lines {count} { + puts -nonewline \x1b\[${count}M + } + } + namespace import ansi::move + namespace import ansi::move_emit + namespace import ansi::move_forward + namespace import ansi::move_back + namespace import ansi::move_up + namespace import ansi::move_down + namespace import ansi::move_column + namespace import ansi::move_row + namespace import ansi::cursor_save + namespace import ansi::cursor_restore + namespace import ansi::cursor_save_dec + namespace import ansi::cursor_restore_dec + namespace import ansi::scroll_up + namespace import ansi::scroll_down + namespace import ansi::enable_alt_screen + namespace import ansi::disable_alt_screen + namespace import ansi::insert_spaces + namespace import ansi::delete_characters + namespace import ansi::erase_characters + namespace import ansi::insert_lines + namespace import ansi::delete_lines + + interp alias {} smcup {} ::punk::console::enable_alt_screen + interp alias {} rmcup {} ::punk::console::disable_alt_screen + + #experimental + proc rhs_prompt {col text} { + package require textblock + lassign [textblock::size $text] _w tw _h th + if {$th > 1} { + #move up first.. need to know current line? + } + #set blanks [string repeat " " [expr {$col + $tw}]] + #puts -nonewline [punk::ansi::erase_eol]$blanks;move_emit_return this $col $text + #puts -nonewline [move_emit_return this $col [punk::ansi::insert_spaces 150]$text] + cursor_save_dec + #move_emit_return this $col [punk::ansi::move_forward 50][punk::ansi::insert_spaces 150][punk::ansi::move_back 50][punk::ansi::move_forward $col]$text + #puts -nonewline [punk::ansi::insert_spaces 150][punk::ansi::move_column $col]$text + puts -nonewline [punk::ansi::erase_eol][punk::ansi::move_column $col]$text + cursor_restore + } + proc move_emit_return {row col data args} { + #todo detect if in raw mode or not? + set is_in_raw 0 + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + + set commands "" + append commands [punk::ansi::move_emit $row $col $data] + foreach {row col data} $args { + append commands [punk::ansi::move_emit $row $col $data] + } + if {!$is_in_raw} { + incr orig_row -1 + } + append commands [punk::ansi::move $orig_row $orig_col] + puts -nonewline stdout $commands + return "" + } + #we can be (slightly?) faster and more efficient if we use the consoles cursor_save_dec command - but each savecursor overrides any previous one. + #leave cursor_off/cursor_on to caller who can wrap more efficiently.. + proc cursorsave_move_emit_return {row col data args} { + set commands "" + append commands [punk::ansi::cursor_save_dec] + append commands [punk::ansi::move_emit $row $col $data] + foreach {row col data} $args { + append commands [punk::ansi::move_emit $row $col $data] + } + append commands [punk::ansi::cursor_restore_dec] + puts -nonewline stdout $commands; flush stdout + } + proc move_emitblock_return {row col textblock} { + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + + set commands "" + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + append commands [punk::ansi::move $orig_row $orig_col] + puts -nonewline $commands + return + } + proc cursorsave_move_emitblock_return {row col textblock} { + set commands "" + append commands [punk::ansi::cursor_save_dec] + foreach ln [split $textblock \n] { + append commands [punk::ansi::move_emit $row $col $ln] + incr row + } + append commands [punk::ansi::cursor_restore_dec] + puts -nonewline stdout $commands;flush stdout + return + } + proc move_call_return {row col script} { + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + move $row $col + uplevel 1 $script + move $orig_row $orig_col + } + + #this doesn't work - we would need an internal virtual screen structure to pick up cursor attributes from arbitrary locations? + # ncurses and its ilk may have something like that - but we specifically want to avoid curses libraries + proc pick {row col} { + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + set test "" + #set test [a green Yellow] + move_emit $row $col $test\0337 + puts -nonewline \0338\033\[${orig_row}\;${orig_col}H + } + proc pick_emit {row col data} { + set test "" + #set test [a green Purple] + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + move_emit $row $col $test\0337 + puts -nonewline \0338\033\[${orig_row}\;${orig_col}H$data + } + + # -- --- --- --- --- --- + namespace eval ansi { + proc test_decaln {} { + puts -nonewline stdout [punk::ansi::test_decaln] + } + } + namespace import ansi::test_decaln + + namespace eval clock { + + #map chars of chars "0" to "?"" ie 0x30 to x3f + variable fontmap1 { + 7C CE DE F6 E6 C6 7C 00 + 30 70 30 30 30 30 FC 00 + 78 CC 0C 38 60 CC FC 00 + 78 CC 0C 38 0C CC 78 00 + 1C 3C 6C CC FE 0C 1E 00 + FC C0 F8 0C 0C CC 78 00 + 38 60 C0 F8 CC CC 78 00 + FC CC 0C 18 30 30 30 00 + 78 CC CC 78 CC CC 78 00 + 78 CC CC 7C 0C 18 70 00 + 00 18 18 00 00 18 18 00 + 00 18 18 00 00 18 18 30 + 18 30 60 C0 60 30 18 00 + 00 00 7E 00 7E 00 00 00 + 60 30 18 0C 18 30 60 00 + 3C 66 0C 18 18 00 18 00 + } + #libungif extras + append fontmap1 { + 7c 82 9a aa aa 9e 7c 00 + 38 6c c6 c6 fe c6 c6 00 + fc c6 c6 fc c6 c6 fc 00 + } + + #https://github.com/Distrotech/libungif/blob/master/lib/gif_font.c + variable fontmap { + } + #ascii row 0x00 to 0x1F control chars + #(cp437 glyphs) + append fontmap { + 00 00 00 00 00 00 00 00 + 3c 42 a5 81 bd 42 3c 00 + 3c 7e db ff c3 7e 3c 00 + 00 ee fe fe 7c 38 10 00 + 10 38 7c fe 7c 38 10 00 + 00 3c 18 ff ff 08 18 00 + 10 38 7c fe fe 10 38 00 + 00 00 18 3c 18 00 00 00 + ff ff e7 c3 e7 ff ff ff + 00 3c 42 81 81 42 3c 00 + ff c3 bd 7e 7e bd c3 ff + 1f 07 0d 7c c6 c6 7c 00 + 00 7e c3 c3 7e 18 7e 18 + 04 06 07 04 04 fc f8 00 + 0c 0a 0d 0b f9 f9 1f 1f + 00 92 7c 44 c6 7c 92 00 + 00 00 60 78 7e 78 60 00 + 00 00 06 1e 7e 1e 06 00 + 18 7e 18 18 18 18 7e 18 + 66 66 66 66 66 00 66 00 + ff b6 76 36 36 36 36 00 + 7e c1 dc 22 22 1f 83 7e + 00 00 00 7e 7e 00 00 00 + 18 7e 18 18 7e 18 00 ff + 18 7e 18 18 18 18 18 00 + 18 18 18 18 18 7e 18 00 + 00 04 06 ff 06 04 00 00 + 00 20 60 ff 60 20 00 00 + 00 00 00 c0 c0 c0 ff 00 + 00 24 66 ff 66 24 00 00 + 00 00 10 38 7c fe 00 00 + 00 00 00 fe 7c 38 10 00 + } + #chars SP to "/" row 0x20 to 0x2f + append fontmap { + 00 00 00 00 00 00 00 00 + 30 30 30 30 30 00 30 00 + 66 66 00 00 00 00 00 00 + 6c 6c fe 6c fe 6c 6c 00 + 10 7c d2 7c 86 7c 10 00 + f0 96 fc 18 3e 72 de 00 + 30 48 30 78 ce cc 78 00 + 0c 0c 18 00 00 00 00 00 + 10 60 c0 c0 c0 60 10 00 + 10 0c 06 06 06 0c 10 00 + 00 54 38 fe 38 54 00 00 + 00 18 18 7e 18 18 00 00 + 00 00 00 00 00 00 18 70 + 00 00 00 7e 00 00 00 00 + 00 00 00 00 00 00 18 00 + 02 06 0c 18 30 60 c0 00 + } + #chars "0" to "?"" row 0x30 to 0x3f + append fontmap { + 7c c6 c6 c6 c6 c6 7c 00 + 18 38 78 18 18 18 3c 00 + 7c c6 06 0c 30 60 fe 00 + 7c c6 06 3c 06 c6 7c 00 + 0e 1e 36 66 fe 06 06 00 + fe c0 c0 fc 06 06 fc 00 + 7c c6 c0 fc c6 c6 7c 00 + fe 06 0c 18 30 60 60 00 + 7c c6 c6 7c c6 c6 7c 00 + 7c c6 c6 7e 06 c6 7c 00 + 00 30 00 00 00 30 00 00 + 00 30 00 00 00 30 20 00 + 00 1c 30 60 30 1c 00 00 + 00 00 7e 00 7e 00 00 00 + 00 70 18 0c 18 70 00 00 + 7c c6 0c 18 30 00 30 00 + } + #chars "@" to "O" row 0x40 to 0x4f + append fontmap { + 7c 82 9a aa aa 9e 7c 00 + 38 6c c6 c6 fe c6 c6 00 + fc c6 c6 fc c6 c6 fc 00 + 7c c6 c6 c0 c0 c6 7c 00 + f8 cc c6 c6 c6 cc f8 00 + fe c0 c0 fc c0 c0 fe 00 + fe c0 c0 fc c0 c0 c0 00 + 7c c6 c0 ce c6 c6 7e 00 + c6 c6 c6 fe c6 c6 c6 00 + 78 30 30 30 30 30 78 00 + 1e 06 06 06 c6 c6 7c 00 + c6 cc d8 f0 d8 cc c6 00 + c0 c0 c0 c0 c0 c0 fe 00 + c6 ee fe d6 c6 c6 c6 00 + c6 e6 f6 de ce c6 c6 00 + 7c c6 c6 c6 c6 c6 7c 00 + } + #chars "P" to "_" row 0x50 to 0x5f + append fontmap { + fc c6 c6 fc c0 c0 c0 00 + 7c c6 c6 c6 c6 c6 7c 06 + fc c6 c6 fc c6 c6 c6 00 + 78 cc 60 30 18 cc 78 00 + fc 30 30 30 30 30 30 00 + c6 c6 c6 c6 c6 c6 7c 00 + c6 c6 c6 c6 c6 6c 38 00 + c6 c6 c6 d6 fe ee c6 00 + c6 c6 6c 38 6c c6 c6 00 + c3 c3 66 3c 18 18 18 00 + fe 0c 18 30 60 c0 fe 00 + 3c 30 30 30 30 30 3c 00 + c0 60 30 18 0c 06 03 00 + 3c 0c 0c 0c 0c 0c 3c 00 + 00 38 6c c6 00 00 00 00 + 00 00 00 00 00 00 00 ff + } + #chars "`" to "o" row 0x60 to 0x6f + append fontmap { + 30 30 18 00 00 00 00 00 + 00 00 7c 06 7e c6 7e 00 + c0 c0 fc c6 c6 e6 dc 00 + 00 00 7c c6 c0 c0 7e 00 + 06 06 7e c6 c6 ce 76 00 + 00 00 7c c6 fe c0 7e 00 + 1e 30 7c 30 30 30 30 00 + 00 00 7e c6 ce 76 06 7c + c0 c0 fc c6 c6 c6 c6 00 + 18 00 38 18 18 18 3c 00 + 18 00 38 18 18 18 18 f0 + c0 c0 cc d8 f0 d8 cc 00 + 38 18 18 18 18 18 3c 00 + 00 00 cc fe d6 c6 c6 00 + 00 00 fc c6 c6 c6 c6 00 + 00 00 7c c6 c6 c6 7c 00 + } + #chars "p" to DEL row 0x70 to 0x7f + append fontmap { + 00 00 fc c6 c6 e6 dc c0 + 00 00 7e c6 c6 ce 76 06 + 00 00 6e 70 60 60 60 00 + 00 00 7c c0 7c 06 fc 00 + 30 30 7c 30 30 30 1c 00 + 00 00 c6 c6 c6 c6 7e 00 + 00 00 c6 c6 c6 6c 38 00 + 00 00 c6 c6 d6 fe 6c 00 + 00 00 c6 6c 38 6c c6 00 + 00 00 c6 c6 ce 76 06 7c + 00 00 fc 18 30 60 fc 00 + 0e 18 18 70 18 18 0e 00 + 18 18 18 00 18 18 18 00 + e0 30 30 1c 30 30 e0 00 + 00 00 70 9a 0e 00 00 00 + 00 00 18 3c 66 ff 00 00 + } + + proc bigstr {str row col} { + variable fontmap + #curses attr off reverse + #a noreverse + set reverse 0 + set output "" + set charno 0 + foreach char [split $str {}] { + binary scan $char c f + set index [expr {$f * 8}] + for {set line 0} {$line < 8} {incr line} { + set bitline 0x[lindex $fontmap [expr {$index + $line}]] + binary scan [binary format c $bitline] B8 charline + set cix 0 + foreach c [split $charline {}] { + if {$c} { + append output [punk::ansi::move_emit [expr {$row + $line}] [expr {$col + $charno * 8 + $cix}] "[a+ reverse] [a+ noreverse]"] + #curses attr on reverse + #curses move [expr $row + $line] [expr $col + $charno * 8 + $cix] + #curses puts " " + } + incr cix + } + } + incr charno + } + return $output + } + proc get_time {} { + overtype::left -width 70 "" [bigstr [clock format [clock seconds] -format %H:%M:%S] 1 1] + } + + + proc display1 {} { + #punk::console::clear + punk::console::move_call_return 20 20 {punk::console::clear_above} + flush stdout + punk::console::move_call_return 0 0 {puts stdout [bigstr [clock format [clock seconds] -format %H:%M:%S] 10 5]} + after 2000 {punk::console::clock::display} + } + proc display {} { + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + punk::console::move 20 20 + punk::console::clear_above + punk::console::move 0 0 + puts -nonewline [bigstr [clock format [clock seconds] -format %H:%M:%S] 10 5] + + punk::console::move $orig_row $orig_col + #after 2000 {punk::console::clock::display} + } + + proc displaystr {str} { + lassign [punk::console::get_cursor_pos_list] orig_row orig_col + punk::console::move 20 20 + punk::console::clear_above + punk::console::move 0 0 + puts -nonewline [bigstr $str 10 5] + + punk::console::move $orig_row $orig_col + } + + + } + + proc test {} { + set high_unicode_length [string length \U00010000] + set can_high_unicode 0 + set can_regex_high_unicode 0 + set can_terminal_report_dingbat_width 0 + set can_terminal_report_diacritic_width 0 + if {$high_unicode_length != 1} { + puts stderr "punk::console WARNING: no modern unicode support in this Tcl version. High unicode values not properly supported. (string length \\U00010000 : $high_unicode_length should be 1)" + } else { + set can_high_unicode 1 + set can_regex_high_unicode [string match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525] + if {!$can_regex_high_unicode} { + puts stderr "punk::console warning: TCL version cannot perform braced regex of high unicode" + } + } + set dingbat_heavy_plus_width [punk::console::test_char_width \U2795] ;#review - may be font dependent. We chose a wide dingbat as a glyph that is hopefully commonly renderable - and should display 2 wide. + #This will give a false report that terminal can't report width if the glyph (or replacement glyph) is actually being rendered 1 wide. + #we can't distinguish without user interaction? + if {$dingbat_heavy_plus_width == 2} { + set can_terminal_report_dingbat_width 1 + } else { + puts stderr "punk::console warning: terminal either not displaying wide unicode as wide, or unable to report width properly." + } + set diacritic_width [punk::console::test_char_width a\u0300] + if {$diacritic_width == 1} { + set can_terminal_report_diacritic_width 1 + } else { + puts stderr "punk::console warning: terminal unable to report diacritic width properly." + } + + if {$can_high_unicode && $can_regex_high_unicode && $can_terminal_report_dingbat_width && $can_terminal_report_diacritic_width} { + set result [list result ok] + } else { + set result [list result error] + } + return $result + } + #run the test and allow warnings to be emitted to stderr on package load. User should know the terminal and/or Tcl version are not optimal for unicode character work + #set testresult [test1] +} + + + +interp alias {} colour {} punk::console::colour +interp alias {} ansi {} punk::console::ansi +interp alias {} color {} punk::console::colour +interp alias {} a+ {} punk::console::code_a+ +interp alias {} a {} punk::console::code_a +interp alias {} a? {} punk::console::code_a? + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::console [namespace eval punk::console { + variable version + set version 0.1.1 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/docgen-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/docgen-0.1.0.tm new file mode 100644 index 00000000..f4d26342 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/docgen-0.1.0.tm @@ -0,0 +1,71 @@ + +# -*- tcl -* +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::docgen 0.1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +package require punk::repo + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::docgen { + proc get_doctools_comments {fname} { + #does no validation of doctools commands + #existence of string match #\**!doctools is taken as evidence enough that the file has inline doctools - review + if {![file exists $fname]} { + error "get_doctools_comments file '$fname' not found" + } + set fd [open $fname r] + set data [read $fd] + close $fd + if {![string match "*#\**!doctools*" $data]} { + return + } + set data [string map [list \r\n \n] $data] + set in_doctools 0 + set doctools "" + foreach ln [split $data \n] { + set ln [string trim $ln] + if {$in_doctools && [string index $ln 0] != "#"} { + set in_doctools 0 + } elseif {[string range $ln 0 1] == "#*"} { + #todo - process doctools ordering hints in tail of line + set in_doctools 1 + } elseif {$in_doctools} { + append doctools [string range $ln 1 end] \n + } + } + return $doctools + } + #todo - proc autogen_doctools_comments {fname} {} + # - will probably need to use something like parsetcl - as we won't be able to reliably source in an interp without side-effects and use info body etc. + # - mechanism will be to autodocument namespaces, procs, methods where no #*** doctools indication present - but use existing doctools comments for that particular item if it is present. + + + +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::docgen [namespace eval punk::docgen { + variable pkg punk::docgen + variable version + set version 0.1.0 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm new file mode 100644 index 00000000..538dc86f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm @@ -0,0 +1,1291 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::du 0.1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::mix::base +package require struct::set + + +namespace eval punk::du { + variable has_twapi 0 +} +if {"windows" eq $::tcl_platform(platform)} { + if {![interp issafe]} { + package require zzzload + zzzload::pkg_require twapi + } + + if {[catch {package require twapi}]} { + puts stderr "Warning: punk::du - unable to load twapi. Disk operations may be much slower on windows without the twapi package" + } else { + set punk::du::has_twapi 1 + } + #package require punk::winpath +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::du { + + + proc dirlisting {folderpath args} { + set defaults [dict create\ + -glob *\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_glob [dict get $opts -glob] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + if {[lib::pathcharacterlen $folderpath] == 0} { + set folderpath [pwd] + } elseif {[file pathtype $folderpath] ne "absolute"} { + #file normalize relativelly slow - avoid in inner loops + #set folderpath [file normalize $folderpath] + + } + #run whichever of du_dirlisting_twapi, du_dirlisting_generic, du_dirlisting_unix has been activated + set dirinfo [active::du_dirlisting $folderpath {*}$opts] + } + + + + #Note that unix du seems to do depth-first - which makese sense when piping.. as output can be emitted as we go rather than requiring sort at end. + #breadth-first with sort can be quite fast .. but memory usage can easily get out of control + proc du { args } { + variable has_twapi + + + if 0 { + switch -exact [llength $args] { + 0 { + set dir . + set switch -k + } + 1 { + set dir $args + set switch -k + } + 2 { + set switch [lindex $args 0] + set dir [lindex $args 1] + } + default { + set msg "only one switch and one dir " + append msg "currently supported" + return -code error $msg + } + } + + set switch [string tolower $switch] + + set -b 1 + set -k 1024 + set -m [expr 1024*1024] + } + + + set opts $args + # flags in args are solos (or longopts --something=somethingelse) or sometimes pairopts + # we don't currently support mashopts (ie -xy vs separate -x -y) + + + #------------------------------------------------------- + # process any pairopts first and remove the pair + # (may also process some solo-opts) + + set opt_depth -1 + if {[set posn [lsearch $opts -d]] >= 0} { + set opt_depth [lindex $opts $posn+1] + set opts [lreplace $opts $posn $posn+1] + } + foreach o $opts { + if {[string match --max-depth=* $o]} { + set opt_depth [lindex [split $o =] 1] + if {![string is integer -strict $opt_depth]} { + error "--max-depth=n n must be an integer" + } + } + } + #------------------------------------------------------- + #only solos and longopts remain in the opts now + + + set lastarg [lindex $opts end] + if {[string length $lastarg] && (![string match -* $lastarg])} { + set dir $lastarg + set opts [lrange $opts 0 end-1] + } else { + set dir . + set opts $opts + } + foreach a $opts { + if {![string match -* $a]} { + error "unrecognized option '$a'" + } + } + + set -b 1 + set -k 1024 + set -m [expr 1024*1024] + set switch -k ;#default (same as unix) + set lc_opts [string tolower $opts] + + + + if {"-b" in $lc_opts} { + set switch -b + } elseif {"-k" in $lc_opts} { + set switch -k + } elseif {"-m" in $lc_opts} { + set switch -m + } + set opt_progress 0 + if {"--prog" in $lc_opts || "--progress" in $lc_opts} { + set opt_progress 1 + } + set opt_extra 0 + if {"--extra" in $lc_opts} { + set opt_extra 1 + } + set opt_vfs 0 + #This configures whether to enter a vfsmount point + #It will have no effect if cwd already with a vfs mount point - as then opt_vfs will be set to 1 automatically anyway. + if {"--vfs" in $lc_opts} { + set opt_vfs 1 + } + + + + set result [list] + + set dir_depths_remaining [list] + + set is_windows [expr {$::tcl_platform(platform) eq "windows"}] + set zero [expr {0}] + + # ## ### ### ### ### + # containerid and itemid + set folders [list] ;#we lookup string by index + lappend folders [file dirname $dir] + lappend folders $dir ;#itemindex 1 + # ## ### ### ### ### + if {![file isdirectory $dir]} { + lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] [file size $dir]] + #set ary($dir,bytes) [file size $dir] + set leveldircount 0 + } else { + lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] $zero] + set leveldircount 1 + } + set level [expr {0}] + set nextlevel [expr {1}] + #dir_depths list structure + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #0 1 2 3 4 5 + #i_depth i_containerid i_itemid i_item i_size i_index + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set i_depth [expr {0}] + set i_containerid [expr {1}] + set i_itemid [expr {2}] + set i_item [expr {3}] + set i_size [expr {4}] + set i_index [expr {5}] + + + set listlength [llength $dir_depths_remaining] + set diridx 0 + #this is a breadth-first algorithm + while {$leveldircount > 0} { + set leveldirs 0 + set levelfiles 0 + for {set i $diridx} {$i < $listlength} {incr i} { + #lassign [lindex $dir_depths_remaining $i] _d containeridx folderidx itm bytecount + set folderidx [lindex $dir_depths_remaining $i $i_itemid] + set folderpath [lindex $folders $folderidx] + #puts stderr ->$folderpath + #if {$i >= 20} { + #return + #} + + #twapi supports gathering file sizes during directory contents traversal + #for dirlisting methods that return an empty list in filesizes whilst files has entries - we will need to populate it below + #e.g tcl glob based dirlisting doesn't support gathering file sizes at the same time + + set in_vfs 0 + if {[package provide vfs] ne ""} { + foreach vfsmount [vfs::filesystem info] { + if {[file pathtype $folderpath] ne "absolute"} { + set testpath [file normalize $folderpath] + } else { + set testpath $folderpath + } + + if {[punk::mix::base::lib::path_a_atorbelow_b $testpath $vfsmount]} { + set in_vfs 1 + #if already descended to or below a vfs mount point - set opt_vfs true + set opt_vfs 1 + break + } + } + } + + if {$in_vfs} { + set du_info [lib::du_dirlisting_tclvfs $folderpath] + } else { + #run the activated function (proc imported to active namespace and renamed) + set du_info [active::du_dirlisting $folderpath] + } + + + set dirs [dict get $du_info dirs] + set files [dict get $du_info files] + set filesizes [dict get $du_info filesizes] + set vfsmounts [dict get $du_info vfsmounts] + #puts "---> vfsmounts $vfsmounts " + if {$opt_vfs} { + foreach vm $vfsmounts { + #puts stderr "vm: $vm" + #check if vfs is mounted over a file or a dir + if {$vm in $files} { + puts stderr "vfs mounted over file $vm" + set mposn [lsearch $files $vm] + set files [lreplace $files $mposn $mposn] + if {[llength $filesizes]} { + set filesizes [lreplace $filesizes $mposn $mposn] + } + } + if {$vm ni $dirs} { + puts stderr "treating $vm as dir" + lappend dirs $vm + } + } + } + + + incr leveldirs [llength $dirs] + incr levelfiles [llength $files] + + #lappend dir_depths_remaining {*}[lmap d $dirs {::list $nextdepth [lib::du_lit $cont/$itm] $d $zero}] + #folderidx is parent index for new dirs + lappend dir_depths_remaining {*}[lib::du_new_eachdir $dirs $nextlevel $folderidx] + + #we don't need to sort files (unless we add an option such as -a to du (?)) + set bytecount [expr {0}] + + if {[llength $files] && ![llength $filesizes]} { + #listing mechanism didn't supply corresponding sizes + foreach filename $files { + #incr bytecount [file size [file join $folderpath $filename] + incr bytecount [file size $filename] + } + } else { + set filesizes [lsearch -all -inline -not $filesizes[unset filesizes] na] ;#only legal non-number is na + set bytecount [tcl::mathop::+ {*}$filesizes] + } + + + #we can safely assume initial count was zero + lset dir_depths_remaining $i $i_size $bytecount + #incr diridx + } + #puts stdout "level: $level dirs: $leveldirs" + if {$opt_extra} { + puts stdout "level: $level dircount: $leveldirs filecount: $levelfiles" + } + incr level ;#zero based + set nextlevel [expr {$level + 1}] + set leveldircount [expr {[llength $dir_depths_remaining] - $listlength }]; #current - previous - while loop terminates when zero + #puts "diridx: $diridx i: $i rem: [llength $dir_depths_remaining] listlenth:$listlength levldircount: $leveldircount" + set diridx $i + set listlength [llength $dir_depths_remaining] + } + #puts stdout ">>> loop done" + #flush stdout + #puts stdout $dir_depths_remaining + set dirs_as_encountered $dir_depths_remaining ;#index is in sync with 'folders' list + set dir_depths_longfirst $dirs_as_encountered + + #store the index before sorting + for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} { + lset dir_depths_longfirst $i $i_index $i + } + set dir_depths_longfirst [lsort -integer -index 0 -decreasing $dir_depths_longfirst[set dir_depths_longfirst {}]] + + #store main index in the reducing list + set dir_depths_remaining $dir_depths_longfirst + for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} { + #stored index at position 3 + lset dir_depths_remaining $i $i_index $i + } + + #index 3 + #dir_depths_remaining -> dir_depths_longfirst -> dirs_as_encountered + + #puts stdout "initial dir_depths_remaining: $dir_depths_remaining" + + + #summing performance is not terrible but significant on large tree - the real time is for large trees in the main loop above + #update - on really large trees the reverse is true especiallyl now that twapi fixed the original speed issues.. todo - rework/simplify below - review natsort + # + #TODO - reconsider sorting by depth.. lreverse dirs_as_encountered should work.. + if {[llength $dir_depths_longfirst] > 1} { + set i 0 + foreach dd $dir_depths_longfirst { + lassign $dd d parentidx folderidx item bytecount + #set nm $cont/$item + set nm [lindex $folders $folderidx] + set dnext [expr {$d +1}] + set nextdepthposns [lsearch -all -integer -index 0 $dir_depths_remaining $dnext] + set nextdepthposns [lsort -integer -decreasing $nextdepthposns[set nextdepthposns {}]];#remove later elements first + foreach posn $nextdepthposns { + set id [lindex $dir_depths_remaining $posn $i_itemid] + set ndirname [lindex $folders $id] + #set ndirname $cont/$item + #set item [lindex $dir_depths_remaining $posn $i_item] + #set ndirname [lindex $ndir 1] + if {[string match $nm/* $ndirname]} { + #puts stdout "dir $nm adding subdir size $ndirname" + #puts stdout "incr $nm from $ary($nm,bytes) plus $ary($ndirname,bytes)" + incr bytecount [lindex $dir_depths_remaining $posn $i_size] + set dir_depths_remaining [lreplace $dir_depths_remaining[set dir_depths_remaining {}] $posn $posn] + } + } + lset dir_depths_longfirst $i $i_size $bytecount + set p [lsearch -index $i_index -integer $dir_depths_remaining $i] + lset dir_depths_remaining $p $i_size $bytecount + #set ary($nm,bytes) $bytecount + incr i + } + } + #set dir_depths_longfirst [lsort -index 1 -decreasing $dir_depths_longfirst] + # + + set retval [list] + #copy across the bytecounts + for {set i 0} {$i < [llength $dir_depths_longfirst]} {incr i} { + set posn [lindex $dir_depths_longfirst $i $i_index] + set bytes [lindex $dir_depths_longfirst $i $i_size] + lset dirs_as_encountered $posn $i_size $bytes + } + foreach dirinfo [lreverse $dirs_as_encountered] { + set id [lindex $dirinfo $i_itemid] + set depth [lindex $dirinfo $i_depth] + if {($opt_depth >= 0) && $depth > $opt_depth} { + continue + } + set path [lindex $folders $id] + #set path $cont/$item + set item [lindex $dirinfo $i_item] + set bytes [lindex $dirinfo $i_size] + set size [expr {$bytes / [set $switch]}] + lappend retval [list $size $path] + } + # copyright 2002 by The LIGO Laboratory + return $retval + } + namespace eval active { + variable functions [list du_dirlisting ""] + variable functions_known [dict create] + + #known functions from lib namespace + dict set functions_known du_dirlisting [list du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided] + + proc show_functions {} { + variable functions + variable functions_known + set msg "" + dict for {callname implementations} $functions_known { + append msg "callname: $callname" \n + foreach imp $implementations { + if {[dict get $functions $callname] eq $imp} { + append msg " $imp (active)" \n + } else { + append msg " $imp" \n + } + } + } + return $msg + } + proc set_active_function {callname implementation} { + variable functions + variable functions_known + if {$callname ni [dict keys $functions_known]} { + error "unknown function callname $callname" + } + if {$implementation ni [dict get $functions_known $callname]} { + error "unknown implementation $implementation for callname $callname" + } + dict set functions $callname $implementation + + catch {rename ::punk::du::active::$callname ""} + namespace eval ::punk::du::active [string map [list %imp% $implementation %call% $callname] { + namespace import ::punk::du::lib::%imp% + rename %imp% %call% + }] + + return $implementation + } + proc get_active_function {callname} { + variable functions + variable functions_known + if {$callname ni [dict keys $functions_known]} { + error "unknown function callname $callname known functions: [dict keys $functions_known]" + } + return [dict get $functions $callname] + } + + + #where we import & the appropriate du_listing.. function for the platform + } + namespace eval lib { + variable du_literal + variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] + #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api + #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this + + proc decode_win_attributes {bitmask} { + variable winfile_attributes + if {[dict exists $winfile_attributes $bitmask]} { + return [dict get $winfile_attributes $bitmask] + } else { + #list/dict shimmering? + return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + } + } + proc attributes_twapi {path {detail basic}} { + try { + set iterator [twapi::find_file_open $path -detail $detail] ;# -detail full only adds data to the altname field + if {[twapi::find_file_next $iterator iteminfo]} { + set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + set result [dict create -archive 0 -hidden 0 -longname $path -readonly 0 -shortname {} -system 0] + if {"hidden" in $attrinfo} { + dict set result -hidden 1 + } + if {"system" in $attrinfo} { + dict set result -system 1 + } + if {"readonly" in $attrinfo} { + dict set result -readonly 1 + } + dict set result -shortname [dict get $iteminfo altname] + dict set result -rawflags $attrinfo + set extras [list] + #foreach prop {ctime atime mtime size} { + # lappend extras $prop [dict get $iteminfo $prop] + #} + #dict set result -extras $extras + dict set result -raw $iteminfo + return $result + } else { + error "could not read attributes for $path" + } + } finally { + catch {twapi::find_file_close $iterator} + } + } + + #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? + namespace export du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided + # get listing without using unix-tools (may not be installed on the windows system) + # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) + # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance + proc du_dirlisting_twapi {folderpath args} { + set defaults [dict create\ + -glob *\ + -with_sizes 1\ + -with_times 1\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_glob [dict get $opts -glob] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_sizes [dict get $opts -with_sizes] + set ftypes [list f d l] + if {"$opt_with_sizes" in {0 1}} { + #don't use string is boolean - (f false vs f file!) + #only accept 0|1 + if {$opt_with_sizes} { + set sized_types $ftypes + } else { + set sized_types [list] + } + } else { + set sized_types $opt_with_sizes + } + if {[llength $sized_types]} { + foreach st $sized_types { + if {$st ni $ftypes} { + error "du_dirlisting_twapi unrecognized element in -with_sizes '$st'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_times [dict get $opts -with_times] + if {"$opt_with_times" in {0 1}} { + if {$opt_with_times} { + set timed_types $ftypes + } else { + set timed_types [list] + } + } else { + set timed_types $opt_with_times + } + if {[llength $timed_types]} { + foreach item $timed_types { + if {$item ni $ftypes} { + error "du_dirlisting_twapi unrecognised element in -with-times '$item'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + + set errors [dict create] + set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ + # return it so it can be stored and tried as an alternative for problem paths + #puts stderr ">>> glob: $opt_glob" + #REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames + #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ + #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. + # using * all the time may be inefficient - so we might be able to avoid that in some cases. + try { + #glob of * will return dotfiles too on windows + set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field + } on error args { + try { + if {[string match "*denied*" $args]} { + #output similar format as unixy du + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" + puts stderr " (errorcode: $::errorCode)\n" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + + + #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} + #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error + #The find-all glob * won't get here because it returns . & .. + #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) + #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob + #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) + if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { + if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { + #looks like an ordinary no results for chosen glob + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + } + + + if {[set plen [pathcharacterlen $folderpath]] >= 250} { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + # re-fetch this folder with altnames + #file normalize - aside from being slow - will have problems with long paths - so this won't work. + #this function should only accept absolute paths + # + # + #Note: using -detail full only helps if the last segment of path has an altname.. + #To properly shorten we need to have kept track of altname all the way from the root! + #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd + #### SLOW + set fixedpath [dict get [file attributes $folderpath] -shortname] + #### SLOW + + + append errmsg "retrying with with windows altname '$fixedpath'" + puts stderr $errmsg + } else { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + set tmp_errors [list $::errorCode] + #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share + #we can use //?/path dos device path - but not with tcl functions + #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. + #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here + + set fixedtail "" + + set parent [file dirname $folderpath] + set badtail [file tail $folderpath] + set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + if {$nm eq $badtail} { + set fixedtail [dict get $iteminfo altname] + break + } + } + + if {![string length $fixedtail]} { + dict lappend errors $folderpath {*}$tmp_errors + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + + #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. + #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it + #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) + #so the illegalname_fix doesn't really work here + #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] + + #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. + set fixedpath [file join $parent $fixedtail] + append errmsg "retrying with with windows dos device path $fixedpath\n" + puts stderr $errmsg + + } + + if {[catch { + set iterator [twapi::find_file_open $fixedpath/* -detail basic] + } errMsg]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" + puts stderr " (errorcode: $::errorCode)\n" + puts stderr "$errMsg" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + + + } on error args { + set errmsg "error reading folder: $folderpath\n" + append errmsg "error: $args" \n + append errmsg "errorInfo: $::errorInfo" \n + puts stderr "$errmsg" + puts stderr "FAILED to collect info for folder '$folderpath'" + #append errmsg "aborting.." + #error $errmsg + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + + } + } + set dirs [list] + set files [list] + set filesizes [list] + set allsizes [dict create] + set alltimes [dict create] + + set links [list] + set flaggedhidden [list] + set flaggedsystem [list] + set flaggedreadonly [list] + + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + #recheck glob + #review! + if {![string match $opt_glob $nm]} { + continue + } + set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path + set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + #puts stderr "$iteminfo" + #puts stderr "$nm -> [dict get $iteminfo attrs] -> $attrinfo" + set ftype "" + #attributes applicable to any classification + set fullname [file_join_one $folderpath $nm] + + if {"hidden" in $attrinfo} { + lappend flaggedhidden $fullname + } + if {"system" in $attrinfo} { + lappend flaggedsystem $fullname + } + if {"readonly" in $attrinfo} { + lappend flaggedreadonly $fullname + } + + #main classification + if {"reparse_point" in $attrinfo} { + #this concept doesn't correspond 1-to-1 with unix links + #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points + #review - and see which if any actually belong in the links key of our return + + + #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point + # + #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? + #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' + #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls + #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} + #e.g (stripped of headers/footers and other lines) + #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] + #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. + #du includes the size of the symlink + #but we can't get it with tcl's file size + #twapi doesn't seem to have anything to help read it either (?) + #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link + # + #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. + # + #links are techically files too, whether they point to a file/dir or nothing. + + lappend links $fullname + set ftype "l" + } elseif {"directory" in $attrinfo} { + if {$nm in {. ..}} { + continue + } + lappend dirs $fullname + set ftype "d" + } else { + + #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + lappend files $fullname + if {"f" in $sized_types} { + lappend filesizes [dict get $iteminfo size] + } + set ftype "f" + } + if {$ftype in $sized_types} { + dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] + } + if {$ftype in $timed_types} { + #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) + #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds + #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict + dict set alltimes $fullname [dict create\ + c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ + a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ + m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ + ] + } + } + twapi::find_file_close $iterator + set vfsmounts [get_vfsmounts_in_folder $folderpath] + + set effective_opts $opts + dict set effective_opts -with_times $timed_types + dict set effective_opts -with_sizes $sized_types + + #also determine whether vfs. file system x is *much* faster than file attributes + #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder + return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts errors $errors] + } + proc get_vfsmounts_in_folder {folderpath} { + set vfsmounts [list] + if {![llength [package provide vfs]]} { + return [list] + } + set fpath [punk::objclone $folderpath] + set is_rel 0 + if {[file pathtype $fpath] ne "absolute"} { + set fpath [file normalize $fpath] + set is_rel 1 + } + set known_vfs_mounts [vfs::filesystem info] + foreach mount $known_vfs_mounts { + if {[punk::mix::base::lib::path_a_above_b $fpath $mount]} { + if {([llength [file split $mount]] - [llength [file split $fpath]]) == 1} { + #the mount is in this folder + if {$is_rel} { + lappend vfsmounts [file join $folderpath [file tail $mount]] + } else { + lappend vfsmounts $mount + } + } + } + } + return $vfsmounts + } + #work around the horrible tilde-expansion thing (not needed for tcl 9+) + proc file_join_one {base newtail} { + if {[string index $newtail 0] ne {~}} { + return [file join $base $newtail] + } + return [file join $base ./$newtail] + } + + + #this is the cross-platform pure-tcl version - which calls glob multiple times to make sure it gets everythign it needs and can ignore everything it needs to. + #These repeated calls to glob will be a killer for performance - especially on a network share or when walking a large directory structure + proc du_dirlisting_generic {folderpath args} { + set opts [dict create\ + -glob *\ + -with_sizes 0\ + -with_times 0\ + ] + set errors [dict create] + foreach {k v} $args { + switch -- $k { + -glob - -with_sizes - -with_times { + dict set opts $k $v + } + default { + error "du_dirlisting_generic unknown-option '$k'. Known-options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_glob [dict get $opts -glob] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_sizes [dict get $opts -with_sizes] + set ftypes [list f d l] + if {"$opt_with_sizes" in {0 1}} { + #don't use string is boolean (false vs f problem where f indicates file) + if {$opt_with_sizes} { + set sized_types $ftypes + } else { + set sized_types [list] + } + } else { + set sized_types $opt_with_sizes + } + if {[llength $sized_types]} { + foreach st $sized_types { + if {$st ni $ftypes} { + error "du_dirlisting_generic unrecognized element in -with_sizes '$st'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_times [dict get $opts -with_times] + if {"$opt_with_times" in {0 1}} { + if {$opt_with_times} { + set timed_types $ftypes + } else { + set timed_types [list] + } + } else { + set timed_types $opt_with_times + } + if {[llength $timed_types]} { + foreach item $timed_types { + if {$item ni $ftypes} { + error "du_dirlisting_generic unrecognised element in -with-times '$item'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + # The repeated globs are a source of slowness for this function. + #TODO - we could minimize the number of globs if we know we need to do a file stat and/or file attributes on each entry anyway + #For the case where we don't need times,sizes or other metadata - it is faster to do multiple globs + #This all makes this function complicated to gather the required data efficiently. + + #note platform differences between what is considered hidden make this tricky. + # on windows 'glob .*' will not return some hidden dot items but will return . .. and glob -types hidden .* will not return some dotted items + # glob -types hidden * on windows will not necessarily return all dot files/folders + # unix-like platforms seem to consider all dot files as hidden so processing is more straightforward + # we need to process * and .* in the same glob calls and remove duplicates + # if we do * and .* in separate iterations of this loop we lose the ability to filter duplicates easily + + #note - with this design, we can't glob for all except dotfiles - this is for cross-platform consistency and efficiency (Review). + #dotfiles aren't considered hidden on all platforms + #some sort of antiglob is a possible enhancement and more consistent with the view that leading dot should be treated as any other filename character in this context. + if {$opt_glob eq "*"} { + #Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink' + #set parent [lindex $folders $folderidx] + set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} * .*] + #set hdirs {} + set dirs [glob -nocomplain -dir $folderpath -types d * .*] + + set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} * .*] + #set hlinks {} + set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. struct::set difference will remove (?) + #set links [lsort -unique [concat $hlinks $links[unset links]]] + + set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} * .*] + #set hfiles {} + set files [glob -nocomplain -dir $folderpath -types f * .*] + #set files {} + } else { + set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} $opt_glob] + set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] + + set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} $opt_glob] + set links [glob -nocomplain -dir $folderpath -types l $opt_glob] ;#links may have dupes - we don't care. struct::set difference will remove (?) + + set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} $opt_glob] + set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + } + + #note struct::set difference produces unordered result + #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) + #relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' + #remove links and . .. from directories, remove links from files + #struct::set will affect order: tcl vs critcl give different ordering! + set files [struct::set difference [concat $hfiles $files[unset files]] $links] + set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] + #set links [lsort -unique [concat $links $hlinks]] + + + #---- + set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links] + + + + if {"windows" eq $::tcl_platform(platform)} { + set flaggedhidden [concat $hdirs $hfiles $hlinks] + } else { + #unix dotted files/folders are not 'flagged' as such - it's a convention - so the client can use the same convention to decide if something is hidden + #this allows us to return less data - but more importantly - reserve flaggedhidden for systems where such a flag exists and avoid conflating the different concepts of what is hidden + set flaggedhidden {} + } + + set vfsmounts [get_vfsmounts_in_folder $folderpath] + + set effective_opts $opts + dict set effective_opts -with_times $timed_types + dict set effective_opts -with_sizes $sized_types + + return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors] + } + + proc du_dirlisting_tclvfs {folderpath args} { + set defaults [dict create\ + -glob *\ + -with_sizes 0\ + -with_times 0\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_glob [dict get $opts -glob] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_sizes [dict get $opts -with_sizes] + set ftypes [list f d l] + if {"$opt_with_sizes" in {0 1}} { + #don't use string is boolean (false vs f problem where f indicates file) + if {$opt_with_sizes} { + set sized_types $ftypes + } else { + set sized_types [list] + } + } else { + set sized_types $opt_with_sizes + } + if {[llength $sized_types]} { + foreach st $sized_types { + if {$st ni $ftypes} { + error "du_dirlisting_generic unrecognized element in -with_sizes '$st'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_times [dict get $opts -with_times] + if {"$opt_with_times" in {0 1}} { + if {$opt_with_times} { + set timed_types $ftypes + } else { + set timed_types [list] + } + } else { + set timed_types $opt_with_times + } + if {[llength $timed_types]} { + foreach item $timed_types { + if {$item ni $ftypes} { + error "du_dirlisting_generic unrecognised element in -with-times '$item'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + + set errors [dict create] + if {$opt_glob eq "*"} { + set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs + #review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs? + set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove + set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files + } else { + set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] + set links [glob -nocomplain -dir $folderpath -types l $opt_glob] + set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + } + #remove any links from our dirs and files collections + set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] + set files [struct::set difference $files[unset files] $links] + #nested vfs mount.. REVIEW - does anything need special handling? + set vfsmounts [get_vfsmounts_in_folder $folderpath] + + set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links] + + + set effective_opts $opts + dict set effective_opts -with_times $timed_types + dict set effective_opts -with_sizes $sized_types + + return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors] + } + + #we can halve the number of round trips on unix-like systems, where 'hidden' always corresponds to dotted files + proc du_dirlisting_unix {folderpath args} { + set defaults [dict create\ + -glob *\ + -with_sizes 0\ + -with_times 0\ + ] + set errors [dict create] + dict lappend errors $folderpath "metadata support incomplete - prefer du_dirlisting_generic" + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_glob [dict get $opts -glob] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_sizes [dict get $opts -with_sizes] + set ftypes [list f d l] + if {"$opt_with_sizes" in {0 1}} { + #don't use string is boolean (false vs f problem where f indicates file) + if {$opt_with_sizes} { + set sized_types $ftypes + } else { + set sized_types [list] + } + } else { + set sized_types $opt_with_sizes + } + if {[llength $sized_types]} { + foreach st $sized_types { + if {$st ni $ftypes} { + error "du_dirlisting_generic unrecognized element in -with_sizes '$st'" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_with_times [dict get $opts -with_times] + if {"$opt_with_times" in {0 1}} { + if {$opt_with_times} { + set timed_types $ftypes + } else { + set timed_types [list] + } + } else { + set timed_types $opt_with_times + } + if {[llength $timed_types]} { + foreach item $timed_types { + if {$item ni $ftypes} { + error "du_dirlisting_generic unrecognised element in -with-times '$item'" + } + } + } + + #this is not consistent with standard way * works for unix folders - but it is consistent with behaviour of this facility on windows + if {$opt_glob eq "*"} { + set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs + set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove + set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files + } else { + set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] + set links [glob -nocomplain -dir $folderpath -types l $opt_glob] + set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + } + #remove any links from our dirs and files collections + set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]] + set files [struct::set difference $files[unset files] $links] + set vfsmounts [get_vfsmounts_in_folder $folderpath] + + set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links] + + set effective_opts $opts + dict set effective_opts -with_times $timed_types + dict set effective_opts -with_sizes $sized_types + + + return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + + #return fsizes,allsizes,alltimes metadata in same order as files,dirs,links lists - if specified in sized_types + proc du_get_metadata_lists {sized_types timed_types files dirs links} { + set meta_dict [dict create] + set meta_types [concat $sized_types $timed_types] + #known tcl stat keys 2023 - review + set empty_stat_dict [dict create atime {} ctime {} dev {} gid {} ino {} mode {} mtime {} nlink {} size {} type {} uid {}] + #make sure we call file stat only once per item + set statkeys [list] + if {[llength $meta_types]} { + foreach ft {f d l} lvar {files dirs links} { + if {"$ft" in $meta_types} { + foreach path [set $lvar] { + #caller may have read perm on the containing folder - but not on child item - so file stat could raise an error + if {![catch {file stat $path arrstat} errM]} { + dict set meta_dict $path [dict create shorttype $ft {*}[array get arrstat]] + } else { + dict lappend errors $path "file stat error: $errM" + dict set meta_dict $path [dict create shorttype $ft {*}$empty_stat_dict] + } + } + } + } + } + set fsizes [list] + set allsizes [dict create] + set alltimes [dict create] + #review birthtime field of stat? cross-platform differences ctime etc? + dict for {path pathinfo} $meta_dict { + set ft [dict get $pathinfo shorttype] + if {$ft in $sized_types} { + dict set allsizes $path [dict create bytes [dict get $pathinfo size]] + if {$ft eq "f"} { + #subst with na if empty? + lappend fsizes [dict get $pathinfo size] + } + } + if {$ft in $timed_types} { + dict set alltimes $path [dict create c [dict get $pathinfo ctime] a [dict get $pathinfo atime] m [dict get $pathinfo mtime]] + } + } + #todo - fix . The list lengths will presumably match but have empty values if failed to stat + if {"f" in $sized_types} { + if {[llength $fsizes] ne [llength $files]} { + dict lappend errors $folderpath "failed to retrieve all file sizes" + } + } + return [dict create fsizes $fsizes allsizes $allsizes alltimes $alltimes] + } + + + proc du_lit value { + variable du_literal + if {![info exists du_literal($value)]} { + set du_literal($value) $value + } + return $du_literal($value) + } + + #v1 + proc du_new_eachdirtail {dirtails depth parentfolderidx} { + set newlist {} + upvar folders folders + set parentpath [lindex $folders $parentfolderidx] + set newindex [llength $folders] + foreach dt $dirtails { + lappend folders [file join $parentpath [du_lit $dt]]; #store as a 'path' rather than a string (see tcl::unsupported::representation) + lappend newlist [::list $depth $parentfolderidx $newindex [du_lit $dt] [expr {0}]] + incr newindex + } + return $newlist + } + proc du_new_eachdir {dirpaths depth parentfolderidx} { + set newlist {} + upvar folders folders + set newindex [llength $folders] + foreach dp $dirpaths { + lappend folders $dp + #puts stdout "--->$dp" + lappend newlist [::list $depth $parentfolderidx $newindex [du_lit [file tail $dp]] [expr {0}]] + incr newindex + } + return $newlist + } + + #same implementation as punk::strlen + #get length of path which has internal rep of path - maintaining path/list rep without shimmering to string representation. + proc pathcharacterlen {pathrep} { + append str2 $pathrep {} + string length $str2 + } + #just an experiment + proc pathcharacterlen1 {pathrep} { + #This works - but is unnecessarily complex + set l 0 + set parts [file split $pathrep] + if {[llength $parts] < 2} { + return [string length [lindex $parts 0]] + } + foreach seg $parts { + incr l [string length $seg] + } + return [expr {$l + [llength $parts] -2}] + } + #slower - doesn't work for short paths like c:/ + proc pathcharacterlen2 {pathrep} { + return [tcl::mathop::+ {*}[lmap v [set plist [file split $pathrep]] {[string length $v]}] [llength $plist] -2] + } + + #Strip using lengths without examining path components + #without normalization is much faster + proc path_strip_alreadynormalized_prefixdepth {path prefix} { + set tail [lrange [file split $path] [llength [file split $prefix]] end] + if {[llength $tail]} { + return [file join {*}$tail] + } else { + return "" + } + } + + proc du_dirlisting_undecided {folderpath args} { + if {"windows" eq $::tcl_platform(platform)} { + #jmn disable twapi + #tailcall du_dirlisting_generic $folderpath {*}$args + + set loadstate [zzzload::pkg_require twapi] + if {$loadstate ni [list loading failed]} { + #either already loaded by zzload or ordinary package require + package require twapi ;#should be fast once twapi dll loaded in zzzload thread + set ::punk::du::has_twapi 1 + punk::du::active::set_active_function du_dirlisting du_dirlisting_twapi + tailcall du_dirlisting_twapi $folderpath {*}$args + } else { + if {$loadstate eq "failed"} { + puts stderr "punk::du defaulting to du_dirlisting_generic because twapi load failed" + punk::du::active::set_active_function du_dirlisting du_dirlisting_generic + } + tailcall du_dirlisting_generic $folderpath {*}$args + } + } else { + punk::du::active::set_active_function du_dirlisting du_dirlisting_unix + tailcall du_dirlisting_unix $folderpath {*}$args + } + } + + + } + package require natsort + #interp alias {} du {} .=args>* punk::du |> .=>1 natsort::sort -cols 1 |> list_as_lines * punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat words |> list_as_lines * punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat csv -outputformatoptions {\r\t\t\t} |> list_as_lines -- -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::encmime 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::encmime 0 0.1.0] +#[copyright "2024"] +#[titledesc {mime encodings related subset of tcllib mime}] [comment {-- Name section and table of contents description --}] +#[moddesc {mime encoding names and aliases}] [comment {-- Description at end of page heading --}] +#[require punk::encmime] +#[keywords module encodings] +#[description] +#[para] This is a workaround package to provide the mime encoding names used in tcllib's mime package - without additional dependencies +#[para]tcllib mime loads either Trf or tcl::memchan functions. punk::encmime needs to work in a context where tcllib may not yet be loaded/available, and even these few dependencies are too much. +#[para]MAINTENANCE NOTE: The data in this module needs to be checked against the latest tcllib mime package +#[para]taken from tcllib mime version: 1.7.2 in 2024 + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::encmime +#[subsection Concepts] +#[para] Where practical - the actual tcllib mime package should be used instead. +#[para]This set of encoding related functions is a snapshot of the data from the mime package - and may not be up to date. +#[para]This pseudo-package was created to minimize dependencies for punk::char and punk::overtype + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::encmime +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::encmime::class { + #*** !doctools + #[subsection {Namespace punk::encmime::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::encmime { + namespace export * + + variable encList { + ascii US-ASCII + big5 Big5 + cp1250 Windows-1250 + cp1251 Windows-1251 + cp1252 Windows-1252 + cp1253 Windows-1253 + cp1254 Windows-1254 + cp1255 Windows-1255 + cp1256 Windows-1256 + cp1257 Windows-1257 + cp1258 Windows-1258 + cp437 IBM437 + cp737 {} + cp775 IBM775 + cp850 IBM850 + cp852 IBM852 + cp855 IBM855 + cp857 IBM857 + cp860 IBM860 + cp861 IBM861 + cp862 IBM862 + cp863 IBM863 + cp864 IBM864 + cp865 IBM865 + cp866 IBM866 + cp869 IBM869 + cp874 {} + cp932 {} + cp936 GBK + cp949 {} + cp950 {} + dingbats {} + ebcdic {} + euc-cn EUC-CN + euc-jp EUC-JP + euc-kr EUC-KR + gb12345 GB12345 + gb1988 GB1988 + gb2312 GB2312 + iso2022 ISO-2022 + iso2022-jp ISO-2022-JP + iso2022-kr ISO-2022-KR + iso8859-1 ISO-8859-1 + iso8859-2 ISO-8859-2 + iso8859-3 ISO-8859-3 + iso8859-4 ISO-8859-4 + iso8859-5 ISO-8859-5 + iso8859-6 ISO-8859-6 + iso8859-7 ISO-8859-7 + iso8859-8 ISO-8859-8 + iso8859-9 ISO-8859-9 + iso8859-10 ISO-8859-10 + iso8859-13 ISO-8859-13 + iso8859-14 ISO-8859-14 + iso8859-15 ISO-8859-15 + iso8859-16 ISO-8859-16 + jis0201 JIS_X0201 + jis0208 JIS_C6226-1983 + jis0212 JIS_X0212-1990 + koi8-r KOI8-R + koi8-u KOI8-U + ksc5601 KS_C_5601-1987 + macCentEuro {} + macCroatian {} + macCyrillic {} + macDingbats {} + macGreek {} + macIceland {} + macJapan {} + macRoman {} + macRomania {} + macThai {} + macTurkish {} + macUkraine {} + shiftjis Shift_JIS + symbol {} + tis-620 TIS-620 + unicode {} + utf-8 UTF-8 + } + variable encodings + array set encodings $encList + variable reversemap + variable encAliasList { + ascii ANSI_X3.4-1968 + ascii iso-ir-6 + ascii ANSI_X3.4-1986 + ascii ISO_646.irv:1991 + ascii ASCII + ascii ISO646-US + ascii us + ascii IBM367 + ascii cp367 + cp437 cp437 + cp437 437 + cp775 cp775 + cp850 cp850 + cp850 850 + cp852 cp852 + cp852 852 + cp855 cp855 + cp855 855 + cp857 cp857 + cp857 857 + cp860 cp860 + cp860 860 + cp861 cp861 + cp861 861 + cp861 cp-is + cp862 cp862 + cp862 862 + cp863 cp863 + cp863 863 + cp864 cp864 + cp865 cp865 + cp865 865 + cp866 cp866 + cp866 866 + cp869 cp869 + cp869 869 + cp869 cp-gr + cp936 CP936 + cp936 MS936 + cp936 Windows-936 + iso8859-1 ISO_8859-1:1987 + iso8859-1 iso-ir-100 + iso8859-1 ISO_8859-1 + iso8859-1 latin1 + iso8859-1 l1 + iso8859-1 IBM819 + iso8859-1 CP819 + iso8859-2 ISO_8859-2:1987 + iso8859-2 iso-ir-101 + iso8859-2 ISO_8859-2 + iso8859-2 latin2 + iso8859-2 l2 + iso8859-3 ISO_8859-3:1988 + iso8859-3 iso-ir-109 + iso8859-3 ISO_8859-3 + iso8859-3 latin3 + iso8859-3 l3 + iso8859-4 ISO_8859-4:1988 + iso8859-4 iso-ir-110 + iso8859-4 ISO_8859-4 + iso8859-4 latin4 + iso8859-4 l4 + iso8859-5 ISO_8859-5:1988 + iso8859-5 iso-ir-144 + iso8859-5 ISO_8859-5 + iso8859-5 cyrillic + iso8859-6 ISO_8859-6:1987 + iso8859-6 iso-ir-127 + iso8859-6 ISO_8859-6 + iso8859-6 ECMA-114 + iso8859-6 ASMO-708 + iso8859-6 arabic + iso8859-7 ISO_8859-7:1987 + iso8859-7 iso-ir-126 + iso8859-7 ISO_8859-7 + iso8859-7 ELOT_928 + iso8859-7 ECMA-118 + iso8859-7 greek + iso8859-7 greek8 + iso8859-8 ISO_8859-8:1988 + iso8859-8 iso-ir-138 + iso8859-8 ISO_8859-8 + iso8859-8 hebrew + iso8859-9 ISO_8859-9:1989 + iso8859-9 iso-ir-148 + iso8859-9 ISO_8859-9 + iso8859-9 latin5 + iso8859-9 l5 + iso8859-10 iso-ir-157 + iso8859-10 l6 + iso8859-10 ISO_8859-10:1992 + iso8859-10 latin6 + iso8859-14 iso-ir-199 + iso8859-14 ISO_8859-14:1998 + iso8859-14 ISO_8859-14 + iso8859-14 latin8 + iso8859-14 iso-celtic + iso8859-14 l8 + iso8859-15 ISO_8859-15 + iso8859-15 Latin-9 + iso8859-16 iso-ir-226 + iso8859-16 ISO_8859-16:2001 + iso8859-16 ISO_8859-16 + iso8859-16 latin10 + iso8859-16 l10 + jis0201 X0201 + jis0208 iso-ir-87 + jis0208 x0208 + jis0208 JIS_X0208-1983 + jis0212 x0212 + jis0212 iso-ir-159 + ksc5601 iso-ir-149 + ksc5601 KS_C_5601-1989 + ksc5601 KSC5601 + ksc5601 korean + shiftjis MS_Kanji + utf-8 UTF8 + } + + #*** !doctools + #[subsection {Namespace punk::encmime}] + #[para] Core API functions for punk::encmime + #[list_begin definitions] + + # ::mime::mapencoding -- + # + # mime::mapencodings maps tcl encodings onto the proper names for their + # MIME charset type. This is only done for encodings whose charset types + # were known. The remaining encodings return {} for now. + # + # Arguments: + # enc The tcl encoding to map. + # + # Results: + # Returns the MIME charset type for the specified tcl encoding, or {} + # if none is known. + proc mapencoding {enc} { + #*** !doctools + #[call mapencoding [arg enc]] + #[para]maps tcl encodings onto the proper names for their MIME charset type. + #[para]This is only done for encodings whose charset types were known. + #[para]The remaining encodings return {} for now. + #[para]NOTE: consider using tcllib's mime::mapencoding instead if mime package available + + variable encodings + if {[info exists encodings($enc)]} { + return $encodings($enc) + } + return {} + } + + proc reversemapencoding {mimeType} { + #*** !doctools + #[call reversemapencoding [arg mimeType]] + #[para]mime::reversemapencodings maps MIME charset types onto tcl encoding names. + #[para]Returns the tcl encoding name for the specified mime charset, or {} if none is known + #[para] Arguments: + # [list_begin arguments] + # [arg_def string mimeType] The MIME charset to convert into a tcl encoding type. + # [list_end] + #[para]NOTE: consider using tcllib's mime::reversemapencoding instead if mime package available + + variable reversemap + + set lmimeType [string tolower $mimeType] + if {[info exists reversemap($lmimeType)]} { + return $reversemap($lmimeType) + } + return {} + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::encmime ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +::apply {{} { + variable encList + variable encAliasList + variable reversemap + foreach {enc mimeType} $encList { + if {$mimeType eq {}} continue + set reversemap([string tolower $mimeType]) $enc + } + foreach {enc mimeType} $encAliasList { + set reversemap([string tolower $mimeType]) $enc + } + # Drop the helper variables + unset encList encAliasList + +} ::punk::encmime} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::encmime::lib { + namespace export * + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace punk::encmime::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::encmime::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval punk::encmime::system { + #*** !doctools + #[subsection {Namespace punk::encmime::system}] + #[para] Internal functions that are not part of the API + + + +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::encmime [namespace eval punk::encmime { + variable pkg punk::encmime + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/fileline-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/fileline-0.1.0.tm new file mode 100644 index 00000000..7e1ee14c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/fileline-0.1.0.tm @@ -0,0 +1,1731 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::fileline 0.1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::fileline 0 0.1.0] +#[copyright "2024"] +#[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] +#[require punk::fileline] +#[keywords module text parse file encoding BOM] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) +#[para]This is important for certain text files where examining the number of chars/bytes is important +#[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. +#[para]This chunk-size counting will depend on the character encoding. +#[para]Despite including the word 'file', the library doesn't necessarily deal with reading/writing to the filesystem - +#[para]The raw data can be supplied as a string, or loaded from a file using punk::fileline::get_textinfo -file +#[subsection Concepts] +#[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. +#[example_begin] +# package require punk::fileline +# package require fileutil +# set rawdata [lb]fileutil::cat data.txt -translation binary[rb] +# punk::fileline::class::textinfo create obj_data $rawdata +# puts stdout [lb]obj_data linecount[rb] +#[example_end] +#[subsection Notes] +#[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. +#[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. +#[para]No support for lone carriage-returns being interpreted as line-endings. +#[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages needed by punk::fileline +#[list_begin itemized] + + package require Tcl 8.6- + package require punk::args + #*** !doctools + #[item] [package {Tcl 8.6-}] + #[item] [package {punk::args}] + + + # #package require frobz + # #*** !doctools + # #[item] [package {frobz}] + +#*** !doctools +#[list_end] [comment {- end dependencies list -}] + +#*** !doctools +#[subsection {optional dependencies}] +#[para] packages that add functionality but aren't strictly required +#[list_begin itemized] + + #*** !doctools + #[item] [package {punk::ansi}] + #[para] - recommended for class::textinfo [method chunk_boundary_display] + #[item] [package {punk::char}] + #[para] - recommended for class::textinfo [method chunk_boundary_display] + #[item] [package {overtype}] + #[para] - recommended for class::textinfo [method chunk_boundary_display] + + +#*** !doctools +#[list_end] [comment {- end optional dependencies list -}] + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::fileline::class { + namespace export * + #*** !doctools + #[subsection {Namespace punk::fileline::class}] + #[para] class definitions + if {[info commands [namespace current]::textinfo] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + + #uses zero based indexing. Caller can add 1 for line numbers + oo::class create [namespace current]::textinfo { + #*** !doctools + #[enum] CLASS [class textinfo] + #[list_begin definitions] + # [para] [emph METHODS] + + variable o_chunk ;#current state + variable o_chunkop_store + variable o_lineop_store + + variable o_chunk_epoch + variable o_line_epoch + variable o_payloadlist + variable o_linemap + variable o_LF_C + variable o_CRLF_C + + + variable o_bom_id + variable o_bom + variable o_bom_map + + #review - for now we expect datachunk to be data without BOM and already encoded appropriately + #fileline::get_textinfo has support for interpreting BOM - but we currently have no way to do that for data not coming from a file + #refactor to allow that code to be called from here? + constructor {datachunk args} { + #*** !doctools + #[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] + #[para] Constructor for textinfo object which represents a chunk or all of a file + #[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: + #[example_begin] + # fconfigure $fd -translation binary + # set chunkdata [lb]read $fd[rb]] + #or + # set chunkdata [lb]fileutil::cat -translation binary[rb] + #[example_end] + #[para] when loading the data + namespace eval [namespace current] { + set nspath [namespace path] + foreach p [list ::punk::fileline ::punk::fileline::ansi] { + if {$p ni $nspath} { + lappend nspath $p + } + } + namespace path $nspath + } + + set o_bom_map [list\ + utf-8 \u00ef\u00bb\u00bf\ + utf-16be \u00fe\u00ff\ + utf-16le \u00ff\u00fe\ + utf-32be \u0000\u0000\u00fe\u00ff\ + utf-32le \u00ff\u00fe\u0000\u0000\ + utf-7 \u002b\u002f\u0076\ + utf-1 \u00f7\u0064\u004c\ + utf-ebcdic \u00dd\u0073\u0066\u0073\ + utf-scsu \u0003\u00fe\u00ff\ + utf-bocu-1 \u00fb\u00ee\u0028\ + utf-gb18030 \u0084\u0031\u0095\u0033\ + ] + set o_bom_id "" + set o_bom "" ;#review + + set o_chunk $datachunk + set o_line_epoch [list] + set o_chunk_epoch [list "fromchunkchange-at-[clock micros]"] + set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message + set defaults [dict create\ + -substitutionmap {}\ + -crlf_lf_placeholders $crlf_lf_placeholders\ + -userid ""\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "[self] constructor error: unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- + set opt_substitutionmap [dict get $opts -substitutionmap] ;#review - can be done by caller - or a loadable -policy + set opt_crlf_lf_placeholders [dict get $opts -crlf_lf_placeholders] + set opt_userid [dict get $opts -userid] + # -- --- --- --- --- --- --- + + if {[llength $opt_crlf_lf_placeholders] != 2 || [string length [lindex $opt_crlf_lf_placeholders 0]] !=1 || [string length [lindex $opt_crlf_lf_placeholders 1]] !=1} { + error "textinfo::constructor error: -crlf_lf_placeholders requires a list of exactly 2 chars" + } + lassign $opt_crlf_lf_placeholders o_LF_C o_CRLF_C + if {[string first $o_LF_C $o_chunk] >=0} { + set decval [scan $o_LF_C %c] + if {$decval < 32 || $decval > 127} { + set char_desc "(decimal value $decval)" + } else { + set char_desc "'$o_LF_C' (decimal value $decval)" + } + error "textinfo::constructor error: rawfiledata already contains linefeed substitution character $char_desc specified as first element of -crlf_lf_placeholders" + } + if {[string first $o_CRLF_C $o_chunk] >=0} { + set decval [scan $o_CRLF_C %c] + if {$decval < 32 || $decval > 127} { + set char_desc "(decimal value $decval)" + } else { + set char_desc "'$o_CRLF_C' (decimal value $decval)" + } + error "textinfo::constructor error: rawfiledata already contains carriagereturn-linefeed substitution character $char_desc specified as second element of -crlf_lf_placeholders" + } + if {$o_LF_C eq $o_CRLF_C} { + puts stderr "WARNING: same substitution character used for both elements of -crlf_lf_placeholders - byte counting may be off if file contains mixed line-endings" + } + + my regenerate_lines + + } + + method set_bomid {bomid} { + if {$bomid ni [dict keys $o_bom_map]} { + error "Unrecognised bom-id $bomid. Known values: [dict keys $o_bom_map]" + } + set o_bom_id $bomid + set o_bom [dict get $o_bom_map $bomid] + } + method get_bomid {} { + return $o_bom_id + } + method get_bom {} { + return $o_bom + } + + method chunk {chunkstart chunkend} { + #*** !doctools + #[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] + #[para]Return a range of bytes from the underlying raw chunk data. + #[para] e.g The following retrieves the entire chunk + #[para] objName chunk 0 end + return [string range $o_chunk $chunkstart $chunkend] + } + method chunklen {} { + #*** !doctools + #[call class::textinfo [method chunklen]] + #[para] Number of bytes/characters in the raw data of the file + return [string length $o_chunk] + } + method chunk_boundary_display {chunkstart chunkend chunksize args} { + #*** !doctools + #[call class::textinfo [method chunk_boundary_display]] + #[para]Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend + #[para]Defaults to using ansi colour if punk::ansi module is available. Use -ansi 0 to disable colour + set opts [dict create\ + -ansi $::punk::fileline::ansi::enabled\ + -offset 0\ + -displaybytes 200\ + -truncatedmark "..."\ + -completemark "---"\ + -moremark " + "\ + -continuemark " > "\ + -linemaxwidth 100\ + -linebase 0\ + -limit -1\ + -boundaries {}\ + -showconfig 0\ + -boundaryheader {Boundary %i% at %b%}\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + switch -- $k { + -ansi - -offset - -displaybytes - -truncatedmark - -completemark - -moremark - -continuemark - -linemaxwidth - -linebase - -limit - -boundaries - -showconfig - -boundaryheader { + dict set opts $k $v + } + default { + error "[self]::chunk_boundary error: unknown option '$k'. Known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- + set opt_ansi [dict get $opts -ansi] + set opt_offset [dict get $opts -offset] + set opt_displaybytes [dict get $opts -displaybytes] + set opt_tmark [dict get $opts -truncatedmark] + set opt_cmark [dict get $opts -completemark] + set opt_linemax [dict get $opts -linemaxwidth] + set opt_linebase [dict get $opts -linebase] + set opt_linebase [string map [list _ ""] $opt_linebase] + set opt_limit [dict get $opts -limit] ;#limit number of boundaries to display + set opt_boundaries [dict get $opts -boundaries] ;#use pre-calculated boundaries if supplied + set opt_showconfig [dict get $opts -showconfig] + set opt_boundaryheader [dict get $opts -boundaryheader] + # -- --- --- --- --- --- + package require overtype + # will require punk::char and punk::ansi + + if {"::punk::fileline::ansi::ansistrip" ne [info commands ::punk::fileline::ansi::ansistrip]} { + namespace eval ::punk::fileline::ansi { + namespace import ::punk::ansi::* + } + } + + #This mechanism for enabling/disabling ansi is a bit clumsy - prone to errors with regard to keeping in sync with any api changes in punk ansi + #It's done here to allow this to be used without the full set of punk modules and/or shell - REVIEW + + #risk of failing to reset on error + set pre_ansi_enabled $::punk::fileline::ansi::enabled + if {$opt_ansi} { + set ::punk::fileline::ansi::enabled 1 + } else { + set ::punk::fileline::ansi::enabled 0 + } + if {"::punk::fileline::ansistrip" ne [info commands ::punk::fileline::ansistrip]} { + proc ::punk::fileline::a {args} { + if {$::punk::fileline::ansi::enabled} { + tailcall ::punk::fileline::ansi::a {*}$args + } else { + return "" + } + } + proc ::punk::fileline::a+ {args} { + if {$::punk::fileline::ansi::enabled} { + tailcall ::punk::fileline::ansi::a+ {*}$args + } else { + return "" + } + } + proc ::punk::fileline::ansistrip {str} { + if {$::punk::fileline::ansi::enabled} { + tailcall ::punk::fileline::ansi::ansistrip $str + } else { + return $str + } + } + } + set maxline [lindex [my chunkrange_to_linerange $chunkend $chunkend] 0] + set minline [lindex [my chunkrange_to_linerange $chunkstart $chunkstart] 0] + + #suport simple end+-int (+-)start(+-)int to set linebase to line corresponding to chunkstart or chunkend + #also simple int+int and int-int - nothing more complicated (similar to Tcl lrange etc in that regard) + #commonly this will be something like -start or -end + if {![string is integer -strict $opt_linebase]} { + set sign "" + set errunrecognised "unrecognised -linebase value '$opt_linebase'. Expected positive or negative integer or -start -start-int -start+int -end -end-int -end+int or -eof (where leading - is optional but probably desirable) " + if {[string index $opt_linebase 0] eq "-"} { + set sign - + set tail [string range $opt_linebase 1 end] + } else { + set tail [string trimleft $opt_linebase +];#ignore + + } + #todo - switch -glob -- $tail + if {[string match eof* $tail]} { + set endmath [string range $tail 3 end] + #todo endmath? + if {$tail eq "eof"} { + set lastline [lindex [my chunkrange_to_linerange end end] 0] + set linebase ${sign}$lastline + } else { + error $errunrecognised + } + } elseif {[string match end* $tail]} { + set endmath [string range $tail 3 end] + if {[string length $endmath]} { + set op [string index $endmath 0] + if {$op in {+ -}} { + set operand [string range $endmath 1 end] + if {[string is integer -strict $operand]} { + if {$op eq "+"} { + set linebase [expr {$maxline + $operand}] + } else { + set linebase [expr {$maxline - $operand}] + } + } else { + error $errunrecognised + } + } else { + error $errunrecognised + } + } else { + set linebase $maxline + } + set linebase ${sign}$linebase + } elseif {[string match start* $tail]} { + set endmath [string range $tail 5 end] + if {[string length $endmath]} { + set op [string index $endmath 0] + if {$op in {+ -}} { + set operand [string range $endmath 1 end] + if {[string is integer -strict $operand]} { + if {$op eq "+"} { + set linebase [expr {$minline + $operand}] + } else { + set linebase [expr {$minline - $operand}] + } + } else { + error $errunrecognised + } + } else { + error $errunrecognised + } + } else { + set linebase $minline + } + set linebase ${sign}$linebase + } elseif {[string match *-* $tail]} { + set extras [lassign [split $tail -] int1 int2] + if {[llength $extras]} { + error $errunrecognised + } + if {![string is integer -strict $int1] || ![string is integer -strict $int2]} { + error $errunrecognised + } + set linebase [expr {$int1 - $int2}] + set linebase ${sign}$linebase + } elseif {[string match *+* $tail]} { + set extras [lassign [split $tail +] int1 int2] + if {[llength $extras]} { + error $errunrecognised + } + if {![string is integer -strict $int1] || ![string is integer -strict $int2]} { + error $errunrecognised + } + set linebase [expr {$int1 + $int2}] + set linebase ${sign}$linebase + } else { + error $errunrecognised + } + + } else { + set linebase $opt_linebase + } + + lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend + + if {![llength $opt_boundaries]} { + set binfo [lib::range_spans_chunk_boundaries $chunkstart $chunkend $chunksize -offset $opt_offset] + set boundaries [dict get $binfo boundaries] + } else { + set boundaries [list] + foreach b $opt_boundaries { + if {$chunkstart <= $b && $chunkend >= $b} { + lappend boundaries [expr {$b + $opt_offset}] + } + } + } + + + if {![llength $boundaries]} { + return "No boundaries found between $chunkstart and $chunkend for chunksize $chunksize (when offset $opt_offset)" + } + if {$opt_showconfig} { + set result "chunk range $chunkstart $chunkend line range $minline $maxline linebase $linebase limit $opt_limit\n" + } else { + set result "" + } + set pre_bytes [expr {$opt_displaybytes /2}] + set post_bytes $pre_bytes + set max_bytes [expr {[my chunklen] -1}] + if {$opt_limit > 0} { + set boundaries [lrange $boundaries[unset boundaries] 0 $opt_limit-1] + } + + set i 0 + foreach b $boundaries { + if {$opt_boundaryheader ne ""} { + set j [expr {$i+1}] + append result [string map [list %b% $b %i% $i %j% $j] $opt_boundaryheader] \n + } + set low [expr {max(($b - $pre_bytes),0)}] + set high [expr {min(($b + $post_bytes),$max_bytes)}] + + set lineinfolist [my chunkrange_to_lineinfolist $low $high -show_truncated 1] + set le_map [list \r\n \r \n ] + set result_list [list] + foreach lineinfo $lineinfolist { + set lineidx [dict get $lineinfo lineindex] + + set linenum [expr {$lineidx + $linebase}] + set s [dict get $lineinfo start] + set e [dict get $lineinfo end] + + set boundarymarker "" + set displayidx "" + set linenum_display $linenum + if {$s <= $b && $e >= $b} { + set idx [expr {$b - $s}] ;#index into whole position in whole line - not so useful if we're viewing a small section of a line + set char [string index [my line $lineidx] $idx] + set char_display [string map [list \r \n ] $char] + if {[dict get $lineinfo is_truncated]} { + set tside [dict get $lineinfo truncatedside] + set truncated [dict get $lineinfo truncated] + set tlen [string length $truncated] + if {"left" in $tside} { + set tleft [dict get $lineinfo truncatedleft] + set tleftlen [string length $tleft] + set displayidx [expr {$idx - $tleftlen}] + } elseif {"right" in $tside} { + set displayidx $idx + } + } else { + set displayidx $idx + } + set boundarymarker "'[a+ green bold]$char_display[a]'@$displayidx" + set linenum_display ${linenum_display},$idx + } + + set lhs_status $opt_cmark ;#default + set rhs_status $opt_cmark ;#default + if {[dict get $lineinfo is_truncated]} { + set line [dict get $lineinfo truncated] + set tside [dict get $lineinfo truncatedside] + if {"left" in $tside && "right" in $tside } { + set lhs_status $opt_tmark + set rhs_status $opt_tmark + } elseif {"left" in $tside} { + set lhs_status $opt_tmark + } elseif {"right" in $tside} { + set rhs_status $opt_tmark + } + + + } else { + set line [my line $lineidx] + } + if {$displayidx ne ""} { + set line [string replace $line $displayidx $displayidx [a+ White green bold]$char_display[a]] + } + set displayline [string map $le_map $line] + lappend result_list [list $linenum_display $boundarymarker $lhs_status $displayline $rhs_status] + } + set title_linenum "LNUM" + set linenums [lsearch -index 0 -all -inline -subindices $result_list *] + set markers [lsearch -index 1 -all -inline -subindices $result_list *] + set lines [lsearch -index 3 -all -inline -subindices $result_list *] + set title_marker "" + set title_line "Line" + #todo - use punk::char for unicode support of wide chars etc? + set widest_linenum [tcl::mathfunc::max {*}[lmap v [concat [list $title_linenum] $linenums] {string length $v}]] + set widest_marker [tcl::mathfunc::max {*}[lmap v [concat [list $title_marker] $markers] {string length [ansistrip $v]}]] + set widest_status [expr {max([string length $opt_cmark], [string length $opt_tmark])}] + set widest_line [tcl::mathfunc::max {*}[lmap v [concat [list $title_line] $lines] {string length $v}]] + foreach row $result_list { + lassign $row linenum marker lhs_status line rhs_status + append result [format " %-*s " $widest_linenum $linenum] + append result [format " %-*s " $widest_marker $marker] + append result [format " %-*s " $widest_status $lhs_status] + append result [format " %-*s " $widest_line $line] + append result [format " %-*s " $widest_status $rhs_status] \n + } + incr i + } + set ::punk::fileline::ansi::enabled $pre_ansi_enabled + return $result + } + method linecount {} { + #*** !doctools + #[call class::textinfo [method linecount]] + #[para] Number of lines in the raw data of the file, counted as per the policy in effect + return [llength $o_payloadlist] + } + + + method line {lineindex} { + #*** !doctools + #[call class::textinfo [method line] [arg lineindex]] + #[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata + #[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) + #[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" + #[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending + + lassign [my numeric_linerange $lineindex 0] lineindex + + set le [dict get $o_linemap $lineindex le] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] + return [lindex $o_payloadlist $lineindex]$le_chars + } + method chunk_find_glob {globsearch args} { + #todo - use linepayload_find_glob when -ignore_lineendings is 0 - but check truncations for 1st and last line + error "unimplemented" + } + method linepayload_find_glob {globsearch args} { + #*** !doctools + #[call class::textinfo [method linepayload_find_glob] [arg globsearch] [opt {option value...}]] + #[para]Return a lineinfolist (see [method lineinfo] and [method lineinfolist]) of lines where payload matches the [arg globsearch] string + #[para]To limit the returned results use the -limit n option - where -limit 0 means return all matches. + #[para]For example: [method linepayload_find_glob] "*test*" -limit 1 + #[para]The result is always a list of lineinfo dictionaries even if one item is returned + #[para] -limitfrom can be start|end + #[para]The order of results is always the order as they occur in the data - even if -limitfrom end is specified. + #[para]-limitfrom end means that only the last -limit items are returned + #[para]Note that as glob accepts [lb]chars[rb]] to mean match any character in the set given by chars, searching for literal square brackets should be done by escaping the bracket with a backslash + #[para]This is true even if only a single square bracket is being searched for. e.g {*[lb]file*} will not find the word file followed by a left square-bracket - even though the search didn't close the square brackets. + #[para]In the above case - the literal search should be {*\[lb]file*} + + set opts [dict create\ + -limit 0\ + -strategy 1\ + -start 0\ + -end end\ + -limitfrom start\ + ] + foreach {k v} $args { + switch -- $k { + -limit - -strategy - -start - -end - -limitfrom { + dict set opts $k $v + } + default { + error "linepayload_find_glob unknown option '$k'. Known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_limit [dict get $opts -limit] + if {![string is integer -strict $opt_limit] || $opt_limit < 0} { + error "linepayload_find_glob -limit must be positive integer" + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_strategy [dict get $opts -strategy] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_start [dict get $opts -start] + set opt_start [expr {$opt_start}] + if {$opt_start != 0} {error "-start unimplemented"} + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_end [dict get $opts -end] + set max_line_index [expr {[llength $o_payloadlist]-1}] + if {$opt_end eq "end"} { + set opt_end $max_line_index + } + #TODO + if {$opt_end < $max_line_index} {error "-end less than max_line_index unimplemented"} + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_limitfrom [dict get $opts -limitfrom] + #-limitfrom start|end only + #TODO + if {$opt_limitfrom ne "start"} {error "-limitfrom unimplemented"} + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + set lineinfolist [list] + + if {$opt_limit == 1} { + set idx [lsearch -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch] + if {$idx >=0} { + set i [expr {$opt_start + $idx}] + lappend lineinfolist [list lineindex $i [dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] + } + } elseif {$opt_limit == 0} { + set indices [lsearch -all -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch] + foreach irel $indices { + set i [expr {$opt_start + $irel}] + lappend lineinfolist [list lineindex $i [dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] + } + } else { + #todo - auto-strategy based on limit vs number of lines + if {$opt_strategy == 0} { + set posn 0 + for {set r 0} {$r < $opt_limit} {incr r} { + set n [lsearch [lrange $o_payloadlist $posn+$opt_start end] $globsearch] + if {$n >=0} { + set irel [expr {$posn + $n}] + set i [expr {$irel + $opt_start}] + lappend lineinfolist [list lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] + set posn [expr {$irel+1}] + } + } + } else { + set indices [lsearch -all -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch] + set limited [lrange $indices 0 $opt_limit-1] + foreach irel $limited { + set i [expr {$opt_start + $irel}] + lappend lineinfolist [list lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] + } + } + } + return $lineinfolist + } + method linepayload {lineindex} { + #*** !doctools + #[call class::textinfo [method linepayload] [arg lineindex]] + #[para]Return the text of the line indicated by the zero-based lineindex + #[para]The line-ending is not returned in the data - but is still stored against this lineindex + #[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method + #[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used + #[para]To retrieve an entire line including line-ending use the [method line] method. + lassign [my numeric_linerange $lineindex 0] lineindex + return [lindex $o_payloadlist $lineindex] + } + method linepayloads {startindex endindex} { + #*** !doctools + #[call class::textinfo [method linepayloads] [arg startindex] [arg endindex]] + #[para]Return a list of just the payloads in the specified linindex range, with no metadata. + return [lrange $o_payloadlist $startindex $endindex] + } + method linemeta {lineindex} { + #*** !doctools + #[call class::textinfo [method linemeta] [arg lineindex]] + #[para]Return a dict of the metadata for the line indicated by the zero-based lineindex + #[para]Keys returned include + #[list_begin itemized] + #[item] le + #[para] A string representing the type of line-ending: crlf|lf|none + #[item] linelen + #[para] The number of characters/bytes in the whole line including line-ending if any + #[item] payloadlen + #[para] The number of character/bytes in the line excluding line-ending + #[item] start + #[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins + #[item] end + #[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends + #[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload + #[list_end] + lassign [my numeric_linerange $lineindex 0] lineindex + dict get $o_linemap $lineindex + } + method lineinfo {lineindex} { + #*** !doctools + #[call class::textinfo [method lineinfo] [arg lineindex]] + #[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex + #[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. + #[para]The 'payload' value is the same as is returned from the [method linepayload] method. + lassign [my numeric_linerange $lineindex 0] lineindex ;#convert lineindex to canonical number e.g 1_000 -> 1000 end -> highest index + return [dict create lineindex $lineindex {*}[dict get $o_linemap $lineindex] payload [lindex $o_payloadlist $lineindex]] + } + method lineinfolist {startidx endidx} { + #*** !doctools + #[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]] + #[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx + lassign [my numeric_linerange $startidx $endidx] startidx endidx + set chunkstart [dict get $o_linemap $startidx start] + set chunkend [dict get $o_linemap $endidx end] + set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assertion - no need to view truncations as we've picked start and end of complete lines + #verify sanity + set l_start [lindex $line_list 0] + if {[set idx_start [dict get $l_start lineindex]] ne $startidx} { + error "lineinfolist first lineindex $idx_start doesn't match startidx $startidx" + } + set l_end [lindex $line_list end] + if {[set idx_end [dict get $l_end lineindex]] ne $endidx} { + error "lineinfolist last lineindex $idx_end doesn't match endidx $endidx" + } + return $line_list + } + + method linerange_to_chunkrange {startidx endidx} { + #*** !doctools + #[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] + + lassign [my numeric_linerange $startidx $endidx] startidx endidx + #inclusive range + return [list [dict get $o_linemap $startidx start] [dict get $o_linemap $endidx end]] + } + method linerange_to_chunk {startidx endidx} { + #*** !doctools + #[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] + set chunkrange [my linerange_to_chunkrange $startidx $endidx] + return [string range $o_chunk [lindex $chunkrange 0] [lindex $chunkrange 1]] + } + method lines {startidx endidx} { + #*** !doctools + #[call class::textinfo [method lines] [arg startidx] [arg endidx]] + lassign [my numeric_linerange $startidx $endidx] startidx endidx + set linelist [list] + set le_map [dict create lf \n crlf \r\n none ""] + for {set i $startidx} {$i <= $endidx} {incr i} { + lappend linelist "[lindex $o_payloadlist $i][dict get $le_map [dict get $o_linemap $i le]]" + } + return $linelist + } + method linepayloads {startidx endidx} { + #*** !doctools + #[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] + return [lrange $o_payloadlist $startidx $endidx] + } + method chunkrange_to_linerange {chunkstart chunkend} { + #*** !doctools + #[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] + lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend + + set linestart -1 + for {set i 0} {$i < [llength $o_payloadlist]} {incr i} { + if {($chunkstart >= [dict get $o_linemap $i start]) && ($chunkstart <= [dict get $o_linemap $i end])} { + set linestart $i + break + } + } + if {$linestart == -1} { + error "Line with range in chunk spanning start index $chunkstart not found" + } + set lineend -1 + for {set i [expr {[llength $o_payloadlist] -1}]} {$i >=0} {incr i -1} { + if {($chunkend >= [dict get $o_linemap $i start]) && ($chunkend <= [dict get $o_linemap $i end])} { + set lineend $i + break + } + } + if {$lineend == -1} { + error "Line with range spanning end index $chunkend not found" + } + return [list $linestart $lineend] + } + method chunkrange_to_lineinfolist {chunkstart chunkend args} { + #*** !doctools + #[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] + #[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied + #[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. + #[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) + #[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. + + lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend + set defaults [dict create\ + -show_truncated 0\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "chunkrange_to_lines error: unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- + set opt_show_truncated [dict get $opts -show_truncated] + # -- --- --- --- --- --- --- --- + + set infolist [list] + set linerange [my chunkrange_to_linerange $chunkstart $chunkend] + lassign $linerange start_lineindex end_lineindex + + #if -show_truncated + #return extra keys for first and last items (which may be the same item if chunkrange is entirely within a line) + #add is_truncated 0|1 to all lines + #Even if the start/end line is not fully within the chunkrange ie truncated - the 'payload' key will contain the original untruncated data + ########################### + # first line may have payload tail truncated - or just linefeed, or even a split linefeed + ########################### + set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]] + set start_info [dict get $o_linemap $start_lineindex] + + + if {$chunkstart > [dict get $start_info start]} { + dict set first is_truncated 1 + dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line + } else { + dict set first is_truncated 0 + } + + if {$opt_show_truncated} { + #line1 + if {$chunkstart > [dict get $start_info start]} { + #there is lhs truncation + set payload [lindex $o_payloadlist $start_lineindex] + set line_start [dict get $start_info start] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $start_info le]] + set payload_and_le "${payload}${le_chars}" + set split [expr {$chunkstart - $line_start}] + set truncated [string range $payload_and_le $split end] + set lhs [string range $payload_and_le 0 $split-1] + + dict set first truncated $truncated + dict set first truncatedleft $lhs + } + } + ########################### + + ########################### + # middle lines if any - no truncation + ########################### + #difference in indexes of 1 would only mean 2 items to return + set middle_list [list] + if {($end_lineindex - $start_lineindex) > 1} { + for {set i [expr {$start_lineindex +1}]} {$i <= [expr {$end_lineindex -1}] } {incr i} { + #lineindex is key into main list + lappend middle_list [dict create lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i] is_truncated 0] + } + } + ########################### + + ########################### + # tail line may have beginning or all of payload truncated - linefeed may be split if crlf + # may be same line as first line - in which case truncation at beginning as well + if {$end_lineindex == $start_lineindex} { + #same record + set end_info $start_info + + + if {$chunkend < [dict get $end_info end]} { + #there is rhs truncation + if {[dict get $first is_truncated]} { + dict set first truncatedside [list left right] + } else { + dict set first is_truncated 1 + dict set first truncatedside [list right] + } + } + + if {$opt_show_truncated} { + if {$chunkend < [dict get $end_info end]} { + #there is rhs truncation and we need to return the splits + #do rhs truncation - possibly in addition to existing lhs truncation + # ... + set payload [lindex $o_payloadlist $end_lineindex] + set line_start [dict get $end_info start] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] + set payload_and_le "${payload}${le_chars}" + set split [expr {$chunkend - $line_start}] + set truncated [string range $payload_and_le 0 $split] + set rhs [string range $payload_and_le $split+1 end] + dict set first truncatedright $rhs + if {"left" ni [dict get $first truncatedside]} { + #rhs truncation only + puts "payload_and_le: $payload_and_le" + puts "LENGTH: [string length $payload_and_le]" + #--- + #--- + dict set first truncated $truncated + dict set first truncatedside [list right] + } else { + #truncated on both sides + set lhslen [string length [dict get $first truncatedleft]] + #re-truncate the truncation to reapply the original lhs truncation + set truncated [string range $truncated $lhslen end] + dict set first truncated $truncated + } + } + } + #no middle or last to append + lappend infolist $first + } else { + set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]] + set end_info [dict get $o_linemap $end_lineindex] + + + if {$chunkend < [dict get $end_info end]} { + dict set last is_truncated 1 + dict set last truncatedside [list right] + } else { + dict set last is_truncated 0 + } + + if {$opt_show_truncated} { + if {$chunkend < [dict get $end_info end]} { + #there is rhs truncation - and last line in range is a different line to first one + set payload [lindex $o_payloadlist $end_lineindex] + set line_start [dict get $end_info start] + set line_end [dict get $end_info end] + set le [dict get $end_info le] + set le_size [dict get {lf 1 crlf 2 none 0} $le] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] + set payload_and_le "${payload}${le_chars}" + + set split [expr {$chunkend - $line_start}] + set truncated [string range $payload_and_le 0 $split] + set rhs [string range $payload_and_le $split+1 end] + + dict set last truncated $truncated + dict set last truncatedright $rhs + #this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr + #this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload' + } + } + + + lappend infolist $first + if {[llength $middle_list]} { + lappend infolist {*}$middle_list + } + lappend infolist $last + } + ########################### + #assertion all records have is_truncated key. + #assertion if is_truncated == 1 truncatedside should contain a list of either left, right or both left and right + #assertion If not opt_show_truncated - then truncated records will not have truncated,truncatedleft,truncatedright keys. + return $infolist + } + + #need to check truncations so that any split \r\n is counted precisely todo + method chunk_le_counts {chunkstart chunkend} { + set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend -show_truncated 1] + set lf_count 0 + set crlf_count 0 + set none_count 0 + foreach d $infolines { + set le [dict get $d le] + if {$le eq "lf"} { + incr lf_count + } elseif {$le eq "crlf"} { + incr crlf_count + } else { + incr none_count + } + } + #even without split crlf - this can overcount by counting the lf or crlf in a line which had an ending not in the chunk range specified + + #check first and last infoline for truncations + #Also check if the truncation is directly between an crlf + #both an lhs split and an rhs split could land between cr and lf + #to be precise - we should presumably count the part within our chunk as either a none for cr or an lf + #This means a caller counting chunk by chunk using this method will sometimes get the wrong answer depending on where crlfs lie relative to their chosen chunk size + #This is presumably ok - as it should be a well known thing to watch out for. + #If we're only receiving chunk by chunk we can't reliably detect splits vs lone s in the data + #There are surely more efficient ways for a caller to count line-endings in the way that makes sense for them + #but we should makes things as easy as possible for users of this line/chunk structure anyway. + + set first [lindex $infolines 0] + if {[dict get $first is_truncated]} { + #could be the only line - and truncated at one or both ends. + #both a left and a right truncation could split a crlf + + } + set last [lindex $infolines end] + if {[dict get $first lineindex] != [dict get $last lineindex]} { + #only need to process last if it is a different line + #if so - then split can only be left side + + } + + + return [dict create lf $lf_count crlf $crlf_count unterminated $none_count warning line_ending_splits_unimplemented] + } + + #todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk + method append_chunk {rawchunk} { + error "sorry - unimplemented" + } + + method numeric_linerange {startidx endidx} { + #*** !doctools + #[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] + #[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data + #[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices + return [my normalize_indices $startidx $endidx [expr {[dict size $o_linemap]-1}]] + } + method numeric_chunkrange {startidx endidx} { + #*** !doctools + #[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] + #[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data + return [my normalize_indices $startidx $endidx [expr {[string length $o_chunk]-1}]] + } + method normalize_indices {startidx endidx max} { + #*** !doctools + #[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] + #[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max + #[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted + #[para]startidx higher than endidx is allowed + #[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max + set original_startidx $startidx + set original_endidx $endidx + set startidx [string map [list _ ""] $startidx] ;#don't barf on Tcl 8.7+ underscores in numbers - we can't just use expr because it will not handle end-x + set endidx [string map [list _ ""] $endidx] + if {![string is digit -strict "$startidx$endidx"]} { + foreach whichvar [list start end] { + upvar 0 ${whichvar}idx index + if {![string is digit -strict $index]} { + switch -glob -- $index { + end { + set index $max + } + "*-*" { + #end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions + lassign [split $index -] A B + if {$A eq "end"} { + set index [expr {$max - $B}] + } else { + set index [expr {$A - $B}] + } + } + "*+*" { + lassign [split $index +] A B + if {$A eq "end"} { + #review - this will just result in out of bounds error in final test - as desired + #By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all. + set index [expr {$max + $B}] + } else { + set index [expr {$A + $B}] + } + } + default { + #May be something like +2 or -0 which braced expr can hanle + #we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources. + if {[catch {expr {$index}} index]} { + #could be end+x - but we don't want out of bounds to be valid + #set it to something that the final bounds expr test can deal with + set index Inf + } + } + } + } + } + } + #Unlike Tcl lrange,lindex etc - we don't want to support out of bound indices. + #show the supplied index and what it was mapped to in the error message. + if {$startidx < 0 || $startidx > $max} { + error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max" + } + if {$endidx < 0 || $endidx > $max} { + error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max (try $max or end)" + } + return [list $startidx $endidx] + } + + method regenerate_lines {args} { + #*** !doctools + #[call class::textinfo [method regenerate_lines]] + #[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex + #[para]This is called automatically by the Constructor during object creation + #[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered. + #[para]TODO - review whether such manual control will be necessary/desirable + + #we don't store the actual line-endings as characters (for better layout of debug/display of data) - instead we store names lf|crlf|none + + # first split on lf - then crlf. As we've replaced with single substution chars - the order doesn't matter. + set o_payloadlist [list] + set o_linemap [dict create] + set crlf_replace [list \r\n $o_CRLF_C \n $o_LF_C] + set normalised_data [string map $crlf_replace $o_chunk] + + set lf_lines [split $normalised_data $o_LF_C] + + set idx 0 + set lf_count 0 + set crlf_count 0 + set filedata_offset 0 + set i 0 + set imax [expr {[llength $lf_lines]-1}] + foreach lfln $lf_lines { + set crlf_parts [split $lfln $o_CRLF_C] + if {[llength $crlf_parts] <= 1} { + #no crlf + set payloadlen [string length $lfln] + set le_size 1 + set le lf + if {$i == $imax} { + #no more lf segments - and no crlfs + if {$payloadlen > 0} { + #last line in split has chars - therefore there was no trailing line-ending + set le_size 0 + set le none + } else { + #empty space after last line-ending + #not really a line - we get here from splitting on our lf-replacement char + #An editor might display this pseudo-line with a line number - but we won't treat it as one here + break + } + } + lappend o_payloadlist $lfln + set linelen [expr {$payloadlen + $le_size}] + #we include line-ending in byte count for a line. + dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] + incr filedata_offset $linelen + incr lf_count + incr idx + } else { + foreach crlfpart [lrange $crlf_parts 0 end-1] { + lappend o_payloadlist $crlfpart + set payloadlen [string length $crlfpart] + set linelen [expr {$payloadlen + 2}] + dict set o_linemap $idx [list le crlf linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] + incr filedata_offset $linelen + incr crlf_count + incr idx + } + set lfpart [lindex $crlf_parts end] + set payloadlen [string length $lfpart] + if {$i == $imax} { + #no more lf segments - but we did find crlf in last (or perhaps only) lf line + #last element in our split has no le + if {$payloadlen > 0} { + set le_size 0 + set le none + } else { + #set le_size 2 + #set le crlf + break + } + } else { + #more lf segments to come + set le_size 1 + set le lf + } + + lappend o_payloadlist $lfpart + set linelen [expr {$payloadlen + $le_size}] + dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] + incr filedata_offset $linelen + incr lf_count + incr idx + } + incr i + #incr filedata_offset ;#move up 1 so start entry for next line is greater than end entry for previous line + } + set le_count [expr {$lf_count + $crlf_count}] + if {$le_count != [llength $o_payloadlist]} { + puts stderr "fileline::class::textinfo warning. regenerate_lines lf_count: $lf_count + crlf_count: $crlf_count does not equal length of lines stored: [llength $o_payloadlist]" + } + + } + method regenerate_chunk {} { + #o_payloadlist + #o_linemap + set oldsize [string length $o_chunk] + set newchunk "" + dict for {idx lineinfo} $o_linemap { + set + + } + + return [list newsize [string length $newchunk] oldsize $oldsize] + } + + + #*** !doctools + #[list_end] + } + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::fileline { + namespace export * + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::fileline}] + #[para] Core API functions for punk::fileline + #[list_begin definitions] + + proc get_textinfo {args} { + #*** !doctools + #[call get_textinfo [opt {option value...}] [opt datachunk]] + #[para]Returns textinfo object instance representing data in string datachunk or if -file filename supplied - data loaded from a file + #[para]The encoding used is as specified in the -encoding option - or from the Byte Order Mark (bom) at the beginning of the data + #[para]For Tcl 8.6 - encodings such as utf-16le may not be available - so the bytes are swapped appropriately depending on the platform byteOrder and encoding 'unicode' is used. + #[para]encoding defaults to utf-8 if no -encoding specified and no BOM was found + #[para]Whether -encoding was specified or not - by default the BOM characters are not retained in the line-data + #[para]If -includebom 1 is specified - the bom will be retained in the stored chunk and the data for line 1, but will undergo the same encoding transformation as the rest of the data + #[para]The get_bomid method of the returned object will contain an identifier for any BOM encountered. + #[para] e.g utf-8,utf-16be, utf-16le, utf-32be, utf32-le, SCSU, BOCU-1,GB18030, UTF-EBCDIC, utf-1, utf-7 + #[para]If the encoding specified in the BOM isn't recognised by Tcl - the resulting data is likely to remain as the raw bytes of whatever encoding that is. + #[para]Currently only utf-8, utf-16* and utf-32* are properly supported even though the other BOMs are detected, reported via get_bomid, and stripped from the data. + #[para]GB18030 falls back to cp936/gbk (unless a gb18030 encoding has been installed). Use -encoding iso8859-1 if this isn't suitable and you need to do your own processing of the bytes. + + set argument_specification { + -file -default {} -type existingfile + -translation -default iso8859-1 + -encoding -default "\uFFFF" + -includebom -default 0 + *values -min 0 -max 1 + } + lassign [dict values [punk::args::get_dict $argument_specification $args]] opts values + # -- --- --- --- + set opt_file [dict get $opts -file] + set opt_translation [dict get $opts -translation] + set opt_encoding [dict get $opts -encoding] + set opt_includebom [dict get $opts -includebom] + # -- --- --- --- + + if {$opt_file ne ""} { + set filename $opt_file + set fd [open $filename r] + fconfigure $fd -translation binary -encoding $opt_translation;#should use translation binary to get actual line-endings - but we allow caller to override + #Always read encoding in binary - check for bom below and/or apply chosen opt_encoding + set rawchunk [read $fd] + close $fd + if {[llength $values]} { + puts stderr "Ignoring trailing argument [string length [lindex $values 0]] bytes. Not used when -file is specified" + } + } else { + set rawchunk [lindex $values 0] + } + set rawlen [string length $rawchunk] + #examine first 4 bytes for possible BOM + #big-endian BOMs + # ----------------------------------- + #EFBBBF - utf-8 reliabletxt + #FEFF - utf-16be reliabletxt + #FFFE - utf-16le reliabletxt + #0000FEFF - utf-32be reliabletxt + #FFFE0000 - utf-32le + #0000FFFE - utf-32be(2143) non-standard! (not supported) + #FEFF0000 - utf-32le(3412) non-standard! (not supported - will detect as utf-16be) + #2B2F76 - utf-7 (not supported) + #F7644C - utf-1 (not supported) + #DD736673 - UTF-EBCDIC (not supported) + #0EFEFF - SCSU (not supported) + #FBEE28 - BOCU-1 Binary Ordered Compression for Unicode (mime-compatible) - (not supported - fall back to utf-8) + #84319533 - GB18030 - Chinese gov standard (fall back to cp936 with warning if no encoding name) + # ----------------------------------- + + set first32 [string range $rawchunk 0 3] + #scan using capital H for big-endian order + set first32_be [binary scan $first32 H* maybe_bom] ;#we use H* instead of H8 for 8 nibbles (4 bytes) - because our first32 may contain less than 4 bytes - in which case we won't match + set bomid "" + set bomenc "" + set is_reliabletxt 0 ;#see http://reliabletxt.com - only utf-8 with bom, utf-16be, utf-16le, utf-32be supported as at 2024 + set startdata 0 + #todo switch -glob + if {[string match "efbbbf*" $maybe_bom]} { + set bomid utf-8 + set bomenc utf-8 + set is_reliabletxt 1 + set startdata 3 + } elseif {$maybe_bom eq "0000feff"} { + set bomid utf-32be + set bomenc utf-32be + set is_reliabletxt 1 + set startdata 4 + } elseif {$maybe_bom eq "fffe0000"} { + #Technically ambiguous - could be utf-16le bom followed by utf-16 null character (2 byte null) + puts stderr "WARNING - ambiguous BOM fffe0000 found. Treating as utf-32le - but could be utf-16le - consider manually setting -encoding or converting data to another encoding." + set bomid utf-32le + set bomenc utf-32le + set startdata 4 + } elseif {[string match "feff*" $maybe_bom]} { + set bomid utf-16be + set bomenc utf-16be + set is_reliabletxt 1 + set startdata 2 + } elseif {[string match "fffe*" $maybe_bom]} { + set bomid utf-16le + set bomenc utf-16le + set is_reliabletxt 1 + set startdata 2 + } elseif {$maybe_bom eq "0efeff"} { + set bomid scsu + set bomenc "binary" + set startdata 3 + } elseif {$maybe_bom eq "fbee28"} { + set bomid bocu-1 + puts stderr "WARNING - bocu-1 BOM FBEE28 found. Not supported - back to binary" + set bomenc "binary" ;# utf-8??? + set startdata 3 + } elseif {$maybe_bom eq "84319533"} { + if {![dict exists [punk::char::page_names_dict gb18030]]} { + puts stderr "WARNING - no direct support for GB18030 (chinese) - falling back to cp936/gbk" + set bomenc cp936 + } else { + set bomenc [dict get [punk::char::page_names_dict gb18030]] ;#review - this may never exist in Tcl or may be named differently - create a handler? + } + set bomid gb18030 + set startdata 4 + } elseif {$maybe_bom eq "f7644c"} { + puts stderr "WARNING utf-1 BOM F7644C found - not supported. Falling back to binary" + set bomid utf-1 + set bomenc binary + set startdata 3 + } elseif {[string match "2b2f76*" $maybe_bom]} { + puts stderr "WARNING utf-7 BOM 2b2f76 found - not supported. Falling back to binary and leaving BOM in data!" + #review - work out how to strip bom - last 2 bits of 4th byte belong to following character + set bomid utf-7 + set bomenc binary + set startdata 0 + } + + #todo - check xml encoding attribute / html content-type + #todo - a separate chardet (https://chardet.readthedocs.io/ ) or mozilla like mechanism that can be manually called to autodetect character encoding + #This should be an explicit operation - not automatially done here unless we provide a flag for it. + + + if {$opt_includebom} { + set startdata 0 + } + + if {$opt_encoding eq "\uFFFF"} { + if {$bomenc ne "" && $bomenc ne "binary"} { + if {[package vcompare [package provide Tcl] 8.7] < 0} { + #tcl 8.6 has unicode encoding but not utf-16le etc + if {$bomenc ni [encoding names]} { + if {$bomenc eq "utf-16le"} { + if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set datachunk [encoding convertfrom unicode [string range $rawchunk $startdata end]] + set encoding_selected unicode + } else { + set datachunk [encoding convertfrom unicode [system::wordswap16 [string range $rawchunk $startdata end]]] + set encoding_selected "unicode (wordswapped 16)" + } + } elseif {$bomenc eq "utf-16be"} { + if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set datachunk [encoding convertfrom unicode [system::wordswap16 [string range $rawchunk $startdata end]]] + set encoding_selected "unicode (wordswapped 16)" + } else { + set datachunk [encoding convertfrom unicode [string range $rawchunk $startdata end]] + set encoding_selected unicode + } + } elseif {$bomenc eq "utf-32le"} { + if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set datachunk [encoding convertfrom unicode [string range $rawchunk $startdata end]] + set encoding_selected unicode + } else { + set datachunk [encoding convertfrom unicode [system::wordswap32 [string range $rawchunk $startdata end]]] + set encoding_selected "unicode (wordswapped 32)" + } + } elseif {$bomenc eq "utf-32be"} { + if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set datachunk [encoding convertfrom unicode [system::wordswap32 [string range $rawchunk $startdata end]]] + set encoding_selected "unicode (wordswapped 32)" + } else { + set datachunk [encoding convertfrom unicode [string range $rawchunk $startdata end]] + set encoding_selected unicode + } + } else { + error "Encoding $bomenc unavailable in this version of Tcl" + } + } else { + set datachunk [encoding convertfrom $bomenc [string range $rawchunk $startdata end]] + set encoding_selected $bomenc + } + } else { + #tcl 8.7 plus has utf-16le etc + set datachunk [encoding convertfrom $bomenc [string range $rawchunk $startdata end]] + set encoding_selected $bomenc + } + } else { + #!? + if {$bomenc eq "binary"} { + set datachunk [string range $rawchunk $startdata end] + set encoding_selected binary + } else { + set datachunk [encoding convertfrom utf-8 [string range $rawchunk $startdata end]] + set encoding_selected utf-8 + } + } + } else { + #manually specified encoding overrides bom - but still remove bom-chars REVIEW + #e.g we still want bom info - but specify binary encoding + + if {$opt_encoding eq "binary"} { + set datachunk [string range $rawchunk $startdata end] + } else { + set datachunk [encoding convertfrom $opt_encoding [string range $rawchunk $startdata end]] + } + set encoding_selected $opt_encoding + } + + set textobj [class::textinfo new $datachunk] + if {$bomid ne ""} { + $textobj set_bomid $bomid + } + + + + + set summary "" + append summary "Bytes loaded : $rawlen" \n + append summary "BOM ID : $bomid" \n + append summary "Encoding selected : $encoding_selected" \n + append summary "Characters : [$textobj chunklen]" \n + append summary "Lines recognised : [$textobj linecount]" \n + set leinfo [$textobj chunk_le_counts 0 end] + append summary "crlf endings (windows) : [dict get $leinfo crlf]" \n + append summary "lf endings (unix) : [dict get $leinfo lf]" \n + append summary "unterminated lines : [dict get $leinfo unterminated]" \n + puts stdout $summary + return $textobj + } + + proc file_boundary_display {filename startbyte endbyte chunksize args} { + set fd [open $filename r] ;#use default error if file not readable + fconfigure $fd -translation binary + set rawfiledata [read $fd] + close $fd + set textobj [class::textinfo new $rawfiledata] + set result [$textobj chunk_boundary_display $startbyte $endbyte $chunksize {*}$args] + $textobj destroy + return $result + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::fileline ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::fileline::lib { + namespace export * + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace punk::fileline::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + + proc range_spans_chunk_boundaries {start end chunksize args} { + #*** !doctools + #[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] + #[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. + #[list_begin arguments] + # [arg_def integer start] + # [para] zero-based start index of range + # [arg_def integer end] + # [para] zero-based end index of range + # [arg_def integer chunksize] + # [para] Number of bytes/characters in chunk - must be positive and > 0 + #[list_end] + #[para]returns a dict with the keys is_span and boundaries + #[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize + #[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize + #[para]e.g + #[example_begin] + # range_spans_chunk_boundaries 10 1750 512 + # is_span 1 boundaries {512 1024 1536} + #[example_end] + #[para]The -offset option + #[example_begin] + # range_spans_chunk_boundaries 10 1750 512 -offset 2 + # is_span 1 boundaries {514 1026 1538} + #[example_end] + #[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 + if {[catch {package require Tcl 8.7-}]} { + #only one implementation available for older Tcl + tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args + } + if {$chunksize < 1} { + error "range_spans_chunk_boundaries chunksize must be >= 1" + } + + if {(abs($end - $start) / $chunksize) < 75} { + tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args + } else { + tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize {*}$args + } + } + + proc range_boundaries {start end chunksizes args} { + set argd [punk::args::get_dict { + -offset -default 0 + } $args] + lassign [dict values $argd] opts remainingargs + + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval punk::fileline::system { + #*** !doctools + #[subsection {Namespace punk::fileline::system}] + #[para] Internal functions that are not part of the API + + proc wordswap16 {data} { + #scan in one endianness - format in the other. Whether we scan le/be first doesn't matter as long as we format using the opposite endianness + binary scan $data s* elements ;#scan little endian + return [binary format S* $elements] ;#format big endian + } + proc wordswap32 {data} { + binary scan $data i* elements + return [binary format I* $elements] + } + + proc scan32bit_be {i32} { + if {[binary scan $i32 I x]} { + return $x + } else { + error "couldn't scan $i32" + } + } + + #for 8.7+ using lseq + #much faster when resultant boundary size is large (at least when offset 0) + proc _range_spans_chunk_boundaries_lseq {start end chunksize args} { + if {$chunksize < 1} {error "chunksize must be > 0"} ;#sanitycheck in case called directly + set opts [dict create\ + -offset 0\ + ] + foreach {k v} $args { + switch -- $k { + -offset { + dict set opts $k $v + } + default { + error "unknown option '$k'. Known options: [dict keys $opts]" + } + } + } + # -- --- --- --- + set opt_offset [dict get $opts -offset] + # -- --- --- --- + + set smod [expr {$start % $chunksize}] + if {$smod != 0} { + set start [expr {$start + ($chunksize - $smod)}] + if {$start > $end} { + return [list is_span 0 boundaries {}] + } + } + set boundaries [lseq $start to $end $chunksize] + #offset can be negative + if {$opt_offset} { + if {$opt_offset + [lindex $boundaries end] > $end || $opt_offset + [lindex $boundaries 0] < $start} { + set overflow 1 + } else { + set overflow 0 + } + set boundaries [lmap v $boundaries[unset boundaries] {expr {$v + $opt_offset}}] + if {$overflow} { + #we don't know how many overflowed.. + set inrange [list] + foreach b $boundaries { + if {$b >= $start && $b <= $end} { + lappend inrange $b + } + } + set boundaries $inrange + } + } + return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] + } + + #faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case) + #gets very slow (comparitively) with large resultsets + proc _range_spans_chunk_boundaries_tcl {start end chunksize args} { + if {$chunksize < 1} {error "chunksize must be > 0"} ;#sanitycheck in case called directly + set opts [dict create\ + -offset 0\ + ] + foreach {k v} $args { + switch -- $k { + -offset { + dict set opts $k $v + } + default { + error "unknown option '$k'. Known options: [dict keys $opts]" + } + } + } + # -- --- --- --- + set opt_offset [dict get $opts -offset] + # -- --- --- --- + + set is_span 0 + set smod [expr {$start % $chunksize}] + if {$smod != 0} { + set start [expr {$start + ($chunksize - $smod)}] + } + set boundaries [list] + + #we only need to pre-check the result-range for negative offsets - as our main loop stops before end? + if {$opt_offset < 0} { + #set btrack [expr {$start + $opt_offset}] ;#start back one to make sure we catch the first boundary + set btrack $bstart + set boff [expr {$btrack + $opt_offset}] ;#must be growing even if start and offset are negative - as chunksize is at least 1 + while {$boff < $start} { + incr btrack $chunksize + set boff [expr {$btrack + $opt_offset}] + } + set bstart $btrack + } else { + set bstart $start + } + for {set b $bstart} {[set boff [expr {$b + $opt_offset}]] <= $end} {incr b $chunksize} { + lappend boundaries $boff + } + + return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries offset $opt_offset] + } + + proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} { + puts "main : [time {punk::fileline::lib::range_spans_chunk_boundaries $start $end $chunksize} $repeat]" + puts "tcl : [time {punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize} $repeat]" + if {![catch {package require Tcl 8.7-}]} { + puts "lseq : [time {punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize} $repeat]" + } + } +} +namespace eval punk::fileline::ansi { + #*** !doctools + #[subsection {Namespace punk::fileline::ansi}] + #[para]These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable + #[para]See [package punk::ansi] for documentation + #[list_begin definitions] + variable enabled 1 + #*** !doctools + #[call [fun ansi::a]] + #[call [fun ansi::a+]] + #[call [fun ansi::ansistrip]] + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::fileline::ansi ---}] +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::fileline [namespace eval punk::fileline { + variable pkg punk::fileline + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.0.tm new file mode 100644 index 00000000..fea9534f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.0.tm @@ -0,0 +1,1472 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::lib 0.1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::lib 0 0.1.0] +#[copyright "2024"] +#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk library}] [comment {-- Description at end of page heading --}] +#[require punk::lib] +#[keywords module utility lib] +#[description] +#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. +#[para]The base set includes string and math functions but has no specific theme + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::lib +#[subsection Concepts] +#[para]The punk::lib modules should have no strong dependencies other than Tcl +#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. +#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::lib +#[list_begin itemized] + +package require Tcl 8.6 +#*** !doctools +#[item] [package {Tcl 8.6}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib::class { + #*** !doctools + #[subsection {Namespace punk::lib::class}] + #[para] class definitions + if {[info commands [namespace current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib { + namespace export * + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::lib}] + #[para] Core API functions for punk::lib + #[list_begin definitions] + + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + proc K {x y} {return $x} + #*** !doctools + #[call [fun K] [arg x] [arg y]] + #[para]The K-combinator function - returns the first argument, x and discards y + #[para]see [uri https://wiki.tcl-lang.org/page/K] + #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. + + proc hex2dec {args} { + #*** !doctools + #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] + #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values + #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 + #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. + #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 + #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 + + set list_largeHex [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" + } + set defaults [dict create\ + -validate 1\ + -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ + ] + set known_opts [dict keys $defaults] + set fullopts [dict create] + dict for {k v} $argopts { + dict set fullopts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v + } + set opts [dict merge $defaults $fullopts] + # -- --- --- --- + set opt_validate [dict get $opts -validate] + set opt_empty [dict get $opts -empty_as_hex] + # -- --- --- --- + + set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map [list _ ""] [string trim $h]}] + if {$opt_validate} { + #Note appended F so that we accept list of empty strings as per the documentation + if {![string is xdigit -strict [join $list_largeHex ""]F ]} { + error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" + } + } + if {![string is xdigit -strict [string map [list _ ""] $opt_empty]]} { + #mapping empty string to a value destroys any advantage of -scanonly + #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] + if {[lsearch $list_largeHex ""] >=0} { + error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" + } + } else { + set opt_empty [string trim [string map [list _ ""] $opt_empty]] + if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] + set nonempty_head [lrange $list_largeHex 0 $first_empty-1] + set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] + } + + proc dec2hex {args} { + #*** !doctools + #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] + #[para]Convert a list of decimal integers to a list of hex values + #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. + #[para] -case upper|lower determines the case of the hex letters in the output + set list_decimals [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" + } + set defaults [dict create\ + -width 1\ + -case upper\ + -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ + ] + set known_opts [dict keys $defaults] + set fullopts [dict create] + dict for {k v} $argopts { + dict set fullopts [tcl::prefix match -message "options for [namespace current]::dec2hex. Unexpected option" $known_opts $k] $v + } + set opts [dict merge $defaults $fullopts] + # -- --- --- --- + set opt_width [dict get $opts -width] + set opt_case [dict get $opts -case] + set opt_empty [dict get $opts -empty_as_decimal] + # -- --- --- --- + + + set resultlist [list] + if {[string tolower $opt_case] eq "upper"} { + set spec X + } elseif {[string tolower $opt_case] eq "lower"} { + set spec x + } else { + error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" + } + set fmt "%${opt_width}.${opt_width}ll${spec}" + + set list_decimals [lmap d $list_decimals[unset list_decimals] {string map [list _ ""] [string trim $d]}] + if {![string is digit -strict [string map [list _ ""] $opt_empty]]} { + if {[lsearch $list_decimals ""] >=0} { + error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" + } + } else { + set opt_empty [string map [list _ ""] $opt_empty] + if {[set first_empty [lsearch $list_decimals ""]] >= 0} { + set nonempty_head [lrange $list_decimals 0 $first_empty-1] + set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] + } + + proc log2 x "expr {log(\$x)/[expr log(2)]}" + #*** !doctools + #[call [fun log2] [arg x]] + #[para]log base2 of x + #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time + #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) + + proc logbase {b x} { + #*** !doctools + #[call [fun logbase] [arg b] [arg x]] + #[para]log base b of x + #[para]This function uses expr's natural log and the change of base division. + #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 + #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 + expr {log($x)/log($b)} + } + proc factors {x} { + #*** !doctools + #[call [fun factors] [arg x]] + #[para]Return a sorted list of the positive factors of x where x > 0 + #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* + #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + #[para]Comparisons were done with some numbers below 17 digits long + #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + #but has the disadvantage of being slower for 'small' numbers and using more memory. + #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x + #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + #[para] In other mathematical contexts zero may be considered not to divide anything. + set factors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {($x % $j) == 0} { + lappend factors $j [expr {$x / $j}] + } + incr j + } + lappend factors $x + return [lsort -unique -integer $factors] + } + proc oddFactors {x} { + #*** !doctools + #[call [fun oddFactors] [arg x]] + #[para]Return a list of odd integer factors of x, sorted in ascending order + set j 2 + set max [expr {sqrt($x)}] + set factors [list 1] + while {$j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 != 0} { + if {$other ni $factors} { + lappend factors $other + } + } + if {$j % 2 != 0} { + if {$j ni $factors} { + lappend factors $j + } + } + } + incr j + } + return [lsort -integer -increasing $factors] + } + proc greatestFactorBelow {x} { + #*** !doctools + #[call [fun greatestFactorBelow] [arg x]] + #[para]Return the largest factor of x excluding itself + #[para]factor functions can be useful for console layout calculations + #[para]See Tcllib math::numtheory for more extensive implementations + if {$x % 2 == 0 || $x == 0} { + return [expr {$x / 2}] + } + set j 3 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {$x % $j == 0} { + return [expr {$x / $j}] + } + incr j 2 + } + return 1 + } + proc greatestOddFactorBelow {x} { + #*** !doctools + #[call [fun greatestOddFactorBelow] [arg x]] + #[para]Return the largest odd integer factor of x excluding x itself + if {$x %2 == 0} { + return [greatestOddFactor $x] + } + set j 3 + #dumb brute force - time taken to compute is wildly variable on big numbers + #todo - use a (memoized?) generator of primes to reduce the search space + #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. + set god 1 + set max [expr {sqrt($x)}] + while { $j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 == 0} { + set god $j + } else { + set god [expr {$x / $j}] + #lowest j - so other side must be highest + break + } + } + incr j 2 + } + return $god + } + proc greatestOddFactor {x} { + #*** !doctools + #[call [fun greatestOddFactor] [arg x]] + #[para]Return the largest odd integer factor of x + #[para]For an odd value of x - this will always return x + if {$x % 2 != 0 || $x == 0} { + return $x + } + set r [expr {$x / 2}] + while {$r % 2 == 0} { + set r [expr {$r / 2}] + } + return $r + } + proc gcd {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the greatest common divisor of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para]Graphical use: + #[para]An a by b rectangle can be covered with square tiles of side-length c, + #[para]only if c is a common divisor of a and b + + # + # Apply Euclid's good old algorithm + # + if { $n > $m } { + set t $n + set n $m + set m $t + } + + while { $n > 0 } { + set r [expr {$m % $n}] + set m $n + set n $r + } + + return $m + } + proc lcm {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the lowest common multiple of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para] + set gcd [gcd $n $m] + return [expr {$n*$m/$gcd}] + } + proc commonDivisors {x y} { + #*** !doctools + #[call [fun commonDivisors] [arg x] [arg y]] + #[para]Return a list of all the common factors of x and y + #[para](equivalent to factors of their gcd) + return [factors [gcd $x $y]] + } + + #experimental only - there are better/faster ways + proc sieve n { + set primes [list] + if {$n < 2} {return $primes} + set nums [dict create] + for {set i 2} {$i <= $n} {incr i} { + dict set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {dict unset nums $i} + lappend primes $next + dict for {next -} $nums break + } + return [concat $primes [dict keys $nums]] + } + proc sieve2 n { + set primes [list] + if {$n < 2} {return $primes} + set nums [dict create] + for {set i 2} {$i <= $n} {incr i} { + dict set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {dict unset nums $i} + lappend primes $next + #dict for {next -} $nums break + set next [lindex $nums 0] + } + return [concat $primes [dict keys $nums]] + } + + proc hasglobs {str} { + #*** !doctools + #[call [fun hasglobs] [arg str]] + #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] + #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. + regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving + } + + proc trimzero {number} { + #*** !doctools + #[call [fun trimzero] [arg number]] + #[para]Return number with left-hand-side zeros trimmed off - unless all zero + #[para]If number is all zero - a single 0 is returned + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + proc substring_count {str substring} { + #*** !doctools + #[call [fun substring_count] [arg str] [arg substring]] + #[para]Search str and return number of occurrences of substring + + #faster than lsearch on split for str of a few K + if {$substring eq ""} {return 0} + set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] + return [expr {$occurrences / [string length $substring]}] + } + + proc dict_merge_ordered {defaults main} { + #*** !doctools + #[call [fun dict_merge_ordered] [arg defaults] [arg main]] + #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. + #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. + #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. + + #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values + return [dict merge [dict merge $main $defaults] $main] + } + + proc askuser {question} { + #*** !doctools + #[call [fun askuser] [arg question]] + #[para]A basic utility to read an answer from stdin + #[para]The prompt is written to the terminal and then it waits for a user to type something + #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. + #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. + #[para](Generic terminal raw vs linemode detection not yet present) + #[para]The user must hit enter to submit the response + #[para]The return value is the string if any that was typed prior to hitting enter. + #[para]The question argument can be manually colourised using the various punk::ansi funcitons + #[example_begin] + # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] + # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { + # puts "Proceeding" + # } else { + # puts "Cancelled by user" + # } + #[example_end] + puts stdout $question + flush stdout + set stdin_state [fconfigure stdin] + if {[catch { + package require punk::console + set console_raw [set ::punk::console::is_raw] + } err_console]} { + #assume normal line mode + set console_raw 0 + } + try { + fconfigure stdin -blocking 1 + if {$console_raw} { + punk::console::disableRaw + set answer [gets stdin] + punk::console::enableRaw + } else { + set answer [gets stdin] + } + } finally { + fconfigure stdin -blocking [dict get $stdin_state -blocking] + } + return $answer + } + + #e.g linesort -decreasing $data + proc linesort {args} { + #*** !doctools + #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] + #[para]Sort lines in textblock + #[para]Returns another textblock with lines sorted + #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique + if {[llength $args] < 1} { + error "linesort missing lines argument" + } + set lines [lindex $args end] + set opts [lrange $args 0 end-1] + #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts + list_as_lines [lsort {*}$opts [linelist $lines]] + } + + proc list_as_lines {args} { + #*** !doctools + #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] + #[para]This simply joines the elements of the list with -joinchar + #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines + #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. + if {[set eop [lsearch $args --]] == [llength $args]-2} { + #end-of-opts not really necessary - except for consistency with lines_as_list + set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] + } + if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { + set joinchar [lindex $args 1] + set lines [lindex $args 2] + } elseif {[llength $args] == 1} { + set joinchar "\n" + set lines [lindex $args 0] + + } else { + error "list_as_lines usage: list_as_lines ?-joinchar ? " + } + return [join $lines $joinchar] + } + proc list_as_lines2 {args} { + #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible + lassign [dict values [punk::lib::opts_values -minvalues 1 -maxvalues 1 { + -joinchar -default \n + } $args]] opts values + return [join [dict get $values 0] [dict get $opts -joinchar]] + } + + proc lines_as_list {args} { + #*** !doctools + #[call [fun lines_as_list] [opt {option value ...}] [arg text]] + #[para]Returns a list of possibly trimmed lines depeding on options + #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf + #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements + + #The underlying function linelist has the validation code which gives nicer usage errors. + #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error + #..because we don't know what to say if there are odd numbers of args + #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work + #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway + + if {[lsearch $args "--"] == [llength $args]-2} { + set opts [lrange $args 0 end-2] + } else { + set opts [lrange $args 0 end-1] + } + #set opts [dict merge {-block {}} $opts] + set bposn [lsearch $opts -block] + if {$bposn < 0} { + set opts {-block {}} + } + set text [lindex $args end] + tailcall linelist {*}$opts $text + } + #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds + proc lines_as_list2 {args} { + #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults + #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc + #we don't have to decide what is an opt vs a value + #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) + lassign [dict values [punk::lib::opts_values -anyopts 1 { + -block -default {} + } $args]] opts valuedict + tailcall linelist {*}$opts {*}[dict values $valuedict] + } + + # important for pipeline & match_assign + # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? + # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace + proc linelist {args} { + #puts "---->linelist '$args'" + set usage "linelist ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map [list \r\n \n] $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set defaults [dict create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets 1\ + ] + dict for {o v} $arglist { + if {$o ni {-block -line -commandprefix -ansiresets}} { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + set opts [dict merge $defaults $arglist] + # -- --- --- --- --- --- + set opt_block [dict get $opts -block] + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + foreach bo $opt_block { + if {$bo ni $known_blockopts} { + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + #normalize certain combos + if {[set posn [lsearch $opt_block trimhead1]] >=0 && "trimhead" in $opt_block} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {[set posn [lsearch $opt_block trimtail1]] >=0 && "trimtail" in $opt_block} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + # -- --- --- --- --- --- + set opt_line [dict get $opts -line] + set known_lineopts [list trimline trimleft trimright] + foreach lo $opt_line { + if {$lo ni $known_lineopts} { + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + #normalize trimleft trimright combo + if {"trimleft" in $opt_line && "trimright" in $opt_line} { + set opt_line [list "trimline"] + } + # -- --- --- --- --- --- + set opt_commandprefix [dict get $opts -commandprefix] + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + foreach ln $nlsplit { + #already normalized trimleft+trimright to trimline + if {"trimline" in $opt_line} { + lappend linelist [string trim $ln] + } elseif {"trimleft" in $opt_line} { + lappend linelist [string trimleft $ln] + } elseif {"trimright" in $opt_line} { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + + #maintenance - take over from punk::args - or put back in punk::args once fixed to support pipeline argument order + #possible improvements - after the 1st call, replace the callsite in the calling proc with an inline script to process and validate the arguments as specified in optionspecs + #This would require a tcl parser .. and probably lots of other work + #It would also probably only be practical if there are no dynamic entries in the optionspecs. An option for opts_values to indicate the caller wants this optimisation would probably be best. + + #generally we expect values to contain leading dashes only if -- specified. Otherwise no reliable way determine difference between bad flags and values + #If no eopts (--) specified we stop looking for opts at the first nondash encountered in a position we'd expect a dash - so without eopt, values could contain dashes - but not in first position after flags. + #only supports -flag val pairs, not solo options + #If an option is supplied multiple times - only the last value is used. + proc opts_values {args} { + #*** !doctools + #[call [fun opts_values] [opt {option value...}] [arg optionspecs] [arg rawargs] ] + #[para]Parse rawargs as a sequence of zero or more option-value pairs followed by zero or more values + #[para]Returns a dict of the form: opts values + #[para]ARGUMENTS: + #[list_begin arguments] + #[arg_def multiline-string optionspecs] + #[para] This a block of text with records delimited by newlines (lf or crlf) - but with multiline values allowed if properly quoted/braced + #[para]'info complete' is used to determine if a record spans multiple lines due to multiline values + #[para]Each optionspec line must be of the form: + #[para]-optionname -key val -key2 val2... + #[para]where the valid keys for each option specification are: -default -type -range -choices -optional + #[arg_def list rawargs] + #[para] This is a list of the arguments to parse. Usually it will be the \$args value from the containing proc + #[list_end] + #[para] + + #consider line-processing example below for we need info complete to determine record boundaries + #punk::lib::opt_values { + # -opt1 -default {} + # -opt2 -default { + # etc + # } -multiple 1 + #} $args + + #-- cannot be used to allow opts_values itself to accept rawargs as separate values - so it doesn't serve much purpose other than as an indicator of intention + #For consistency we support it anyway. + #we have to be careful with end-of-options flag -- + #It may legitimately be the only value in the rawargs list (which is a bit odd - but possible) or it may occur immediately before optionspecs and rawargs + #if there is more than one entry in rawargs - we won't find it anyway - so that's ok + set eopts_posn [lsearch $args --] + if {$eopts_posn == ([llength $args]-1)} { + #sole argument in rawargs - not the one we're looking for + set eopts_posn -1 + } + if {$eopts_posn >= 0} { + set ov_opts [lrange $args 0 $eopts_posn-1] + set ov_vals [lrange $args $eopts_posn+1 end] + } else { + set ov_opts [lrange $args 0 end-2] + set ov_vals [lrange $args end-1 end] + } + if {[llength $ov_vals] < 2 || [llength $ov_opts] %2 != 0} { + error "opts_args wrong # args: should be opts_values ?opt val?... optionspecs rawargs_as_list + } + set optionspecs [lindex $ov_vals 0] + set optionspecs [string map [list \r\n \n] $optionspecs] + + set rawargs [lindex $ov_vals 1] + + set known_argspecs [list -default -type -range -choices -nocase -optional -multiple -validate_without_ansi -allow_ansi -strip_ansi -ARGTYPE] + set optspec_defaults [dict create\ + -optional 1\ + -allow_ansi 1\ + -validate_without_ansi 0\ + -strip_ansi 0\ + -nocase 0\ + ] + set required_opts [list] + set required_vals [list] + set arg_info [dict create] + set defaults_dict_opts [dict create] + set defaults_dict_values [dict create] + #first process dashed and non-dashed argspecs without regard to whether non-dashed are at the beginning or end + set value_names [list] + + set records [list] + set linebuild "" + foreach rawline [split $optionspecs \n] { + set recordsofar [string cat $linebuild $rawline] + if {![info complete $recordsofar]} { + append linebuild [string trimleft $rawline] \n + } else { + lappend records [string cat $linebuild $rawline] + set linebuild "" + } + } + + foreach ln $records { + set trimln [string trim $ln] + if {$trimln eq "" || [string index $trimln 0] eq "#"} { + continue + } + set argname [lindex $trimln 0] + set argspecs [lrange $trimln 1 end] + if {[llength $argspecs] %2 != 0} { + error "punk::lib::opts_values - bad optionspecs line for argument '$argname' Remaining items on line must be in paired option-value format - received '$argspecs'" + } + if {[string match -* $argname]} { + dict set argspecs -ARGTYPE option + set is_opt 1 + } else { + dict set argspecs -ARGTYPE value + lappend value_names $argname + set is_opt 0 + } + dict for {spec specval} $argspecs { + if {$spec ni $known_argspecs} { + error "punk::lib::opts_values - unrecognised key '$spec' in specifications for argument '$argname' Known option specification keys: $known_argspecs" + } + } + set argspecs [dict merge $optspec_defaults $argspecs] + dict set arg_info $argname $argspecs + if {![dict get $argspecs -optional]} { + if {$is_opt} { + lappend required_opts $argname + } else { + lappend required_vals $argname + } + } + if {[dict exists $arg_info $argname -default]} { + if {$is_opt} { + dict set defaults_dict_opts $argname [dict get $arg_info $argname -default] + } else { + dict set defaults_dict_values $argname [dict get $arg_info $argname -default] + } + } + } + + #puts "--> [info frame -2] <--" + set cmdinfo [dict get [info frame -2] cmd] + #we can't treat cmdinfo as a list - it may be something like {command {*}$args} in which case lindex $cmdinfo 0 won't work + #hopefully first word is a plain proc name if this function was called in the normal manner - directly from a proc + #we will break at first space and assume the lhs of that will give enough info to be reasonable - (alternatively we could use entire cmdinfo - but it might be big and ugly) + set caller [regexp -inline {\S+} $cmdinfo] + + #if called from commandline or some other contexts such as outside of a proc in a namespace - caller may just be "namespace" + if {$caller eq "namespace"} { + set caller "punk::lib::opts_values called from namespace" + } + + # ------------------------------ + if {$caller ne "punk::lib::opts_values"} { + #1) check our caller's call to us - recursive version - perhaps more elegant to eat our own dogfood - but maybe too much overhead for a script-based args processor which is already quite heavy :/ + #lassign [punk::lib::opts_values "-anyopts -default 0 -type integer\n -minvalues -default 0 -type integer\n -maxvalues -default -1 -type integer" $args] _o ownopts _v ownvalues + #if {[dict size $ownvalues] != 2} { + # error "punk::lib::opts_values expected: a multiline text block of option-specifications, a list of args and at most three option pairs -minvalues , -maxvalues , -anyopts true|false - got extra arguments: '$ownvalues'" + #} + #set opt_minvalues [dict get $ownopts -minvalues] + #set opt_maxvalues [dict get $ownopts -maxvalues] + #set opt_anyopts [dict get $ownopts -anyopts] + + #2) Quick and dirty - but we don't need much validation + set defaults [dict create\ + -minvalues 0\ + -maxvalues -1\ + -anyopts 0\ + ] + dict for {k v} $ov_opts { + if {$k ni {-minvalues -maxvalues -anyopts}} { + error "punk::lib::opts_values unrecognised option $k. Known values [dict keys $defaults]" + } + if {![string is integer -strict $v]} { + error "punk::lib::opts_values argument $k must be of type integer" + } + } + set ov_opts [dict merge $defaults $ov_opts] + set opt_minvalues [dict get $ov_opts -minvalues] + set opt_maxvalues [dict get $ov_opts -maxvalues] + set opt_anyopts [dict get $ov_opts -anyopts] + } else { + #don't recurse ie don't check our own args if we called ourself + set opt_minvalues 2 + set opt_maxvalues 2 + set opt_anyopts 0 + } + # ------------------------------ + + if {[set eopts [lsearch $rawargs "--"]] >= 0} { + set values [lrange $rawargs $eopts+1 end] + set arglist [lrange $rawargs 0 $eopts-1] + } else { + if {[lsearch $rawargs -*] >= 0} { + #to support option values with leading dash e.g -offset -1 , we can't just take the last flagindex + set i 0 + foreach {k v} $rawargs { + if {![string match -* $k]} { + break + } + if {$i+1 >= [llength $rawargs]} { + #no value for last flag + error "bad options for $caller. No value supplied for last option $k" + } + incr i 2 + } + set arglist [lrange $rawargs 0 $i-1] + set values [lrange $rawargs $i end] + } else { + set arglist [list] + set values $rawargs ;#no -flags detected + } + } + #confirm any valnames before last don't have -multiple key + foreach valname [lrange $value_names 0 end-1] { + if {[dict exists $arg_info $valname -multiple ]} { + error "bad key -multiple on argument spec for '$valname'. Only the last value argument specification can be marked -multiple" + } + } + set values_dict [dict create] + set validx 0 + set in_multiple "" + foreach valname $value_names val $values { + if {$validx+1 > [llength $values]} { + break + } + if {$valname ne ""} { + if {[dict exists $arg_info $valname -multiple] && [dict get $arg_info $valname -multiple]} { + dict lappend values_dict $valname $val + set in_multiple $valname + } else { + dict set values_dict $valname $val + } + } else { + if {$in_multiple ne ""} { + dict lappend values_dict $in_multiple $val + } else { + dict set values_dict $validx $val + } + } + incr validx + } + + if {$opt_maxvalues == -1} { + #only check min + if {[llength $values] < $opt_minvalues} { + error "bad number of trailing values for $caller. Got [llength $values] values. Expected at least $opt_minvalues" + } + } else { + if {[llength $values] < $opt_minvalues || [llength $values] > $opt_maxvalues} { + if {$opt_minvalues == $opt_maxvalues} { + error "bad number of trailing values for $caller. Got [llength $values] values. Expected exactly $opt_minvalues" + } else { + error "bad number of trailing values for $caller. Got [llength $values] values. Expected between $opt_minvalues and $opt_maxvalues inclusive" + } + } + } + #opts explicitly marked as -optional 0 must be present - regardless of -anyopts (which allows us to ignore additional opts to pass on to next call) + #however - if -anyopts is true, there is a risk we will treat a shortened option name as matching our default - when it was intended for the next call + #We will always require exact matches for all required opts to avoid this risk, even though an ultimately-called function may not require the full-length option-name REVIEW + #The aim is to allow a wrapper function to specify a default value for an option (and/or other changes/restrictions to a particular option or two) - without having to re-specify all the options for the underlying function. + #without full respecification in the wrapper - we can't know that a supplied prefix is unambiguous at the next level + #For this reason we need to pass on full-length opts for any defined options in the wrapper even if anyopts is true + set argnamespresent [dict keys $arglist] + foreach r $required_opts { + if {$r ni $argspresent} { + error "Required option missing for $caller. '$r' is marked with -optional false - so must be present in its full-length form" + } + } + set valuenamespresent [dict keys $values_dict] + foreach r $required_vals { + if {$r ni $valuenamespresent} { + error "Required value missing for $caller. '$r' is marked with -optional false - so must be present" + } + } + if {!$opt_anyopts} { + set checked_args [dict create] + for {set i 0} {$i < [llength $arglist]} {incr i} { + #allow this to error out with message indicating expected flags + set val [lindex $arglist $i+1] + set fullopt [tcl::prefix match -message "options for $caller. Unexpected option" [dict keys $arg_info] [lindex $arglist $i]] + if {[dict exists $arg_info $fullopt -multiple] && [dict get $arg_info $fullopt -multiple]} { + dict lappend checked_args $fullopt $val + } else { + dict set checked_args $fullopt $val + } + incr i ;#skip val + } + } else { + #still need to use tcl::prefix match to normalize - but don't raise an error + set checked_args [dict create] + dict for {k v} $arglist { + if {![catch {tcl::prefix::match [dict keys $arg_info] $k} fullopt]} { + if {[dict exists $arg_info $fullopt -multiple] && [dict get $arg_info $fullopt -multiple]} { + dict lappend checked_args $fullopt $v + } else { + dict set checked_args $fullopt $v + } + } else { + #opt was unspecified + dict set checked_args $k $v + } + } + } + set opts [dict merge $defaults_dict_opts $checked_args] + #assert - checked_args keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options + + set values [dict merge $defaults_dict_values $values_dict] + + #todo - allow defaults outside of choices/ranges + + #check types,ranges,choices + set opts_and_values [concat $opts $values] + set combined_defaults [concat $defaults_dict_values $defaults_dict_opts] ;#can be no key collisions - we don't allow a value key beginning with dash - opt names must begin with dash + dict for {o v} $opts_and_values { + if {[dict exists $arg_info $o -multiple] && [dict get $arg_info $o -multiple]} { + set vlist $v + } else { + set vlist [list $v] + } + + if {[dict exists $arg_info $o -validate_without_ansi] && [dict get $arg_info $o -validate_without_ansi]} { + set validate_without_ansi 1 + package require punk::ansi + } else { + set validate_without_ansi 0 + } + if {[dict exists $arg_info $o -allow_ansi] && [dict get $arg_info $o -allow_ansi]} { + set allow_ansi 1 + } else { + #ironically - we need punk::ansi to detect and disallow - but we don't need it if ansi is allowed + package require punk::ansi + set allow_ansi 0 + } + if {!$allow_ansi} { + foreach e $vlist { + if {[punk::ansi::ta::detect $e]} { + error "Option $o for $caller contains ansi - but -allow_ansi is false. Received: '$e'" + } + } + } + + set vlist_check [list] + foreach e $vlist { + if {$validate_without_ansi} { + lappend vlist_check [punk::ansi::stripansi $e] + } else { + lappend vlist_check $e + } + } + + set is_default 0 + foreach e $vlist e_check $vlist_check { + if {[dict exists $combined_defaults $o] && ($e_check eq [dict get $combined_defaults $o])} { + incr is_default + } + } + if {$is_default eq [llength $vlist]} { + set is_default true + } + #we want defaults to pass through - even if they don't pass the checks that would be required for a specified value + #If the caller manually specified a value that happens to match the default - we don't detect that as any different from an unspecified value - Review. + if {!$is_default} { + if {[dict exists $arg_info $o -type]} { + set type [dict get $arg_info $o -type] + if {[string tolower $type] in {int integer double}} { + if {[string tolower $type] in {int integer}} { + foreach e $vlist e_check $vlist_check { + if {![string is integer -strict $e_check]} { + error "Option $o for $caller requires type 'integer'. Received: '$e'" + } + } + } elseif {[string tolower $type] in {double}} { + foreach e $vlist e_check $vlist_check { + if {![string is double -strict $e_check]} { + error "Option $o for $caller requires type 'double'. Received: '$e'" + } + } + } + + #todo - small-value double comparisons with error-margin? review + if {[dict exists $arg_info $o -range]} { + lassign [dict get $arg_info $o -range] low high + foreach e $vlist e_check $vlist_check { + if {$e_check < $low || $e_check > $high} { + error "Option $o for $caller must be between $low and $high. Received: '$e'" + } + } + } + } elseif {[string tolower $type] in {bool boolean}} { + foreach e $vlist e_check $vlist_check { + if {![string is boolean -strict $e_check]} { + error "Option $o for $caller requires type 'boolean'. Received: '$e'" + } + } + } elseif {[string tolower $type] in {alnum alpha ascii control digit graph lower print punct space upper wordchar xdigit}} { + foreach e $vlist e_check $vlist_check { + if {![string is [string tolower $type] $e_check]} { + error "Option $o for $caller requires type '[string tolower $type]'. Received: '$e'" + } + } + } elseif {[string tolower $type] in {file directory existingfile existingdirectory}} { + foreach e $vlist e_check $vlist_check { + if {!([string length $e_check]>0 && ![regexp {[\"*?<>\;]} $e_check])} { + error "Option $o for $caller requires type '[string tolower $type]'. Received: '$e' which doesn't look like it could be a file or directory" + } + } + if {[string tolower $type] in {existingfile}} { + foreach e $vlist e_check $vlist_check { + if {![file exists $e_check]} { + error "Option $o for $caller requires type '[string tolower $type]'. Received: '$e' which is not an existing file" + } + } + } elseif {[string tolower $type] in {existingdirectory}} { + foreach e $vlist e_check $vlist_check { + if {![file isdirectory $e_check]} { + error "Option $o for $caller requires type '[string tolower $type]'. Received: '$e' which is not an existing directory" + } + } + } + } elseif {[string tolower $type] in {char character}} { + foreach e $vlist e_check $vlist_check { + if {[string length != 1]} { + error "Option $o for $caller requires type '[string tolower $type]'. Received: '$e' which is not a single character" + } + } + } + } + if {[dict exists $arg_info $o -choices]} { + set choices [dict get $arg_info $o -choices] + set nocase [dict get $arg_info $o -nocase] + foreach e $vlist e_check $vlist_check { + if {$nocase} { + set casemsg "(case insensitive)" + set choices_test [string tolower $choices] + set v_test [string tolower $e_check] + } else { + set casemsg "(case sensitive)" + set v_test $e_check + set choices_test $choices + } + if {$v_test ni $choices_test} { + error "Option $o for $caller must be one of the listed values $choices $casemsg. Received: '$e'" + } + } + } + } + if {[dict exists $arg_info $o -strip_ansi] && [dict get $arg_info $o -strip_ansi]} { + set stripped_list [list] + foreach e $vlist { + lappend stripped_list [punk::ansi::stripansi $e] + } + if {[dict exists $arg_info $o -multiple] && [dict get $arg_info $o -multiple]} { + if {[dict get $arg_info $o -ARGTYPE] eq "option"} { + dict set opts $o $stripped_list + } else { + dict set values $o $stripped_list + } + } else { + if {[dict get $arg_info $o -ARGTYPE] eq "option"} { + dict set opts $o [lindex $stripped_list 0] + } else { + dict set values [lindex $stripped_list 0] + } + } + } + } + + #maintain order of opts $opts values $values as caller may use lassign. + return [dict create opts $opts values $values] + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#todo - way to generate 'internal' docs separately? +#*** !doctools +#[section Internal] +namespace eval punk::lib::system { + #*** !doctools + #[subsection {Namespace punk::lib::system}] + #[para] Internal functions that are not part of the API + #[list_begin definitions] + + proc mostFactorsBelow {n} { + ##*** !doctools + #[call [fun mostFactorsBelow] [arg n]] + #[para]Find the number below $n which has the greatest number of factors + #[para]This will get slow quickly as n increases (100K = 1s+ 2024) + set most 0 + set mostcount 0 + for {set i 1} {$i < $n} {incr i} { + set fc [llength [punk::lib::factors $i]] + if {$fc > $mostcount} { + set most $i + set mostcount $fc + } + } + return [list number $most numfactors $mostcount] + } + proc factorCountBelow_punk {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [punk::lib::factors $i]] + } + return $tally + } + proc factorCountBelow_numtheory {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) + package require math::numtheory + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [math::numtheory::factors $i]] + } + return $tally + } + + proc factors2 {x} { + ##*** !doctools + #[call [fun factors2] [arg x]] + #[para]Return a sorted list of factors of x + #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. + set smallfactors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j < $max} { + if {($x % $j) == 0} { + lappend smallfactors $j + lappend largefactors [expr {$x / $j}] + } + incr j + } + #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop + if {($x % $j) == 0} { + if {$j == ($x / $j)} { + lappend smallfactors $j + } + } + return [concat $smallfactors [lreverse $largefactors] $x] + } + + #important - used by punk::repl + proc incomplete {partial} { + #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + #puts stderr "-->$clist<--" + set waiting [list ""] + set innerpartials [list ""] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } ;# set escaped 0 at end + set p [lindex $innerpartials end] + if {$escaped == 0} { + if {$c eq {"}} { + if {![info complete ${p}]} { + lappend waiting {"} + lappend innerpartials "" + } else { + if {[lindex $waiting end] eq {"}} { + #this quote is endquote + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + if {![info complete ${p}$c]} { + lappend waiting {"} + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } elseif {$c eq "\["} { + if {![info complete ${p}$c]} { + lappend waiting "\]" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } elseif {$c eq "\{"} { + if {![info complete ${p}$c]} { + lappend waiting "\}" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } else { + set p ${p}${c} + lset innerpartials end $p + } + set escaped 0 + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + set debug 0 + if {$debug} { + foreach w $waiting p $innerpartials { + puts stderr "->'$w' partial: $p" + } + } + return $incomplete + } + #This only works for very simple cases will get confused with for example: + # {set x "a["""} + proc incomplete_naive {partial} { + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + set waiting [list] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } + if {!$escaped} { + if {$c eq {"}} { + if {[lindex $waiting end] eq {"}} { + set waiting [lrange $waiting 0 end-1] + } else { + lappend waiting {"} + } + } elseif {$c eq "\["} { + lappend waiting "\]" + } elseif {$c eq "\{"} { + lappend waiting "\}" + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + } + } + } + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + return $incomplete + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::lib [namespace eval punk::lib { + variable pkg punk::lib + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm new file mode 100644 index 00000000..3a5764b5 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.1.tm @@ -0,0 +1,3073 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::lib 0.1.1 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::lib 0 0.1.1] +#[copyright "2024"] +#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk library}] [comment {-- Description at end of page heading --}] +#[require punk::lib] +#[keywords module utility lib] +#[description] +#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. +#[para]The base set includes string and math functions but has no specific theme + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::lib +#[subsection Concepts] +#[para]The punk::lib modules should have no strong dependencies other than Tcl +#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. +#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::lib +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::lib::class { +# #*** !doctools +# #[subsection {Namespace punk::lib::class}] +# #[para] class definitions +# if {[info commands [tcl::namespace::current]::interface_sample1] eq ""} { +# #*** !doctools +# #[list_begin enumerated] +# +# # oo::class create interface_sample1 { +# # #*** !doctools +# # #[enum] CLASS [class interface_sample1] +# # #[list_begin definitions] +# +# # method test {arg1} { +# # #*** !doctools +# # #[call class::interface_sample1 [method test] [arg arg1]] +# # #[para] test method +# # puts "test: $arg1" +# # } +# +# # #*** !doctools +# # #[list_end] [comment {-- end definitions interface_sample1}] +# # } +# +# #*** !doctools +# #[list_end] [comment {--- end class enumeration ---}] +# } +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +tcl::namespace::eval punk::lib::ensemble { + #wiki.tcl-lang.org/page/ensemble+extend + # extend an ensemble-like routine with the routines in some namespace + proc extend {routine extension} { + if {![string match ::* $routine]} { + set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] + if {$resolved eq {}} { + error [list {no such routine} $routine] + } + set routine $resolved + } + set routinens [tcl::namespace::qualifiers $routine] + if {$routinens eq {::}} { + set routinens {} + } + set routinetail [tcl::namespace::tail $routine] + + if {![string match ::* $extension]} { + set extension [uplevel 1 [ + list [tcl::namespace::which namespace] current]]::$extension + } + + if {![tcl::namespace::exists $extension]} { + error [list {no such namespace} $extension] + } + + set extension [tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] current]] + + tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] export *] + + while 1 { + set renamed ${routinens}::${routinetail}_[info cmdcount] + if {[tcl::namespace::which $renamed] eq {}} break + } + + rename $routine $renamed + + tcl::namespace::eval $extension [ + list namespace ensemble create -command $routine -unknown [ + list apply {{renamed ensemble routine args} { + list $renamed $routine + }} $renamed + ] + ] + + return $routine + } +} + +tcl::namespace::eval punk::lib::compat { + #*** !doctools + #[subsection {Namespace punk::lib::compat}] + #[para] compatibility functions for features that may not be available in earlier Tcl versions + #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. + #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. + + #*** !doctools + #[list_begin definitions] + + if {"::lremove" ne [info commands ::lremove]} { + #puts stderr "Warning - no built-in lremove" + interp alias {} lremove {} ::punk::lib::compat::lremove + } + proc lremove {list args} { + #*** !doctools + #[call [fun lremove] [arg list] [opt {index ...}]] + #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove + + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lsearch -all -inline -index 1 -subindices $keep *] + } + #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers + proc lremove2 {list args} { + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lmap v $keep {lindex $v 1}] + } + #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. + #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 - and even in 8.7 it doesn't seem to allow returning one index of the stridden 'group' + + if {"::lpop" ne [info commands ::lpop]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lpop {} ::punk::lib::compat::lpop + } + proc lpop {lvar args} { + #*** !doctools + #[call [fun lpop] [arg listvar] [opt {index}]] + #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop + upvar $lvar l + if {![llength $args]} { + set args [list end] + } + set v [lindex $l {*}$args] + set newlist $l + + set path [list] + set subl $l + for {set i 0} {$i < [llength $args]} {incr i} { + set idx [lindex $args $i] + if {![llength [lrange $subl $idx $idx]]} { + error "tcl_lpop index \"$idx\" out of range" + } + lappend path [lindex $args $i] + set subl [lindex $l {*}$path] + } + + set sublist_path [lrange $args 0 end-1] + set tailidx [lindex $args end] + if {![llength $sublist_path]} { + #set newlist [lremove $newlist $tailidx] + set newlist [lreplace $newlist $tailidx $tailidx] + } else { + set sublist [lindex $newlist {*}$sublist_path] + #set sublist [lremove $sublist $tailidx] + set sublist [lreplace $sublist $tailidx $tailidx] + lset newlist {*}$sublist_path $sublist + } + #puts "[set l] -> $newlist" + set l $newlist + return $v + } + + + #slight isolation - varnames don't leak - but calling context vars can be affected + proc lmaptcl2 {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list info vars]] + set nscaller [uplevel 1 [list namespace current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result [apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + if {"::lmap" ne [info commands ::lmap]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lpop {} ::punk::lib::compat::lmaptcl + } + #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway + proc lmaptcl {varnames list script} { + set result [list] + set varlist [list] + foreach varname $varnames { + upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc + lappend varlist var_$varname + } + foreach $varlist $list { + lappend result [uplevel 1 $script] + } + return $result + } + + #tcl8.7/9 compatibility for 8.6 + if {[info commands ::tcl::string::insert] eq ""} { + #https://wiki.tcl-lang.org/page/string+insert + # Pure Tcl implementation of [string insert] command. + proc ::tcl::string::insert {string index insertString} { + # Convert end-relative and TIP 176 indexes to simple integers. + if {[regexp -expanded { + ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace + |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace + (?:([+-]) # op, omitted when index is "end" + ([+-]?\d+))? # n, omitted when index is "end" + [\t\n\v\f\r ]*$ # optional whitespace (unless "end") + } $index _ m op n]} { + # Convert first index to an integer. + switch $m { + end {set index [string length $string]} + default {scan $m %d index} + } + + # Add or subtract second index, if provided. + switch $op { + + {set index [expr {$index + $n}]} + - {set index [expr {$index - $n}]} + } + } elseif {![string is integer -strict $index]} { + # Reject invalid indexes. + return -code error "bad index \"$index\": must be\ + integer?\[+-\]integer? or end?\[+-\]integer?" + } + + # Concatenate the pre-insert, insertion, and post-insert strings. + string cat [string range $string 0 [expr {$index - 1}]] $insertString\ + [string range $string $index end] + } + + # Bind [string insert] to [::tcl::string::insert]. + tcl::namespace::ensemble configure string -map [tcl::dict::replace\ + [tcl::namespace::ensemble configure string -map]\ + insert ::tcl::string::insert] + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib { + tcl::namespace::export * + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::lib}] + #[para] Core API functions for punk::lib + #[list_begin definitions] + + if {[info commands lseq] ne ""} { + #tcl 8.7+ lseq significantly faster, especially for larger ranges + #support minimal set from to + proc range {from to} { + lseq $from $to + } + } else { + #lseq accepts basic expressions e.g 4-2 for both arguments + #e.g we can do lseq 0 [llength $list]-1 + #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. + proc range {from to} { + set to [offset_expr $to] + set from [offset_expr $from] + if {$to > $from} { + set count [expr {($to -$from) + 1}] + if {$from == 0} { + return [lsearch -all [lrepeat $count 0] *] + } else { + incr from -1 + return [lmap v [lrepeat $count 0] {incr from}] + } + #slower methods. + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from] + #} + #return $L + } elseif {$from > $to} { + set count [expr {$from - $to} + 1] + #1) + if {$to == 0} { + return [lreverse [lsearch -all [lrepeat $count 0] *]] + } else { + incr from + return [lmap v [lrepeat $count 0] {incr from -1}] + } + + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from -1];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from -1] + #} + #return $L + } else { + return [list $from] + } + } + } + + #experiment with equiv of js template literals with ${expression} in templates + #e.g tstr {This is the value of x in calling scope ${$x} !} + #e.g tstr -allowcommands {This is the value of x in calling scope ${[set x]} !} + #e.g tstr -allowcommands {This is the value of [lindex $x 0] in calling scope ${[lindex [set x] 0]} !} + proc tstr {args} { + set argd [punk::args::get_dict { + *proc -name punk::lib::tstr -help "A rough equivalent of js template literals" + -allowcommands -default 0 -type none -help "if -allowcommands is true placeholder can contain commands e.g {${plaintext1 [lindex $var 0] plaintext2}}" + -return -default list -choices {dict list string} + *values -min 1 -max 1 + templatestring -help "This argument should be a braced string containing placeholders such as ${$var} e.g {The value is ${$var}} + where $var will be substituted from the calling context + The placeholder itself can contain plaintext portions as well as variables. + It can contain commands in square brackets if -allowcommands is true" + } $args] + set templatestring [dict get $argd values templatestring] + set opt_allowcommands [dict get $argd opts -allowcommands] + set opt_return [dict get $argd opts -return] + set nocommands "-nocommands" + if {$opt_allowcommands == 1} { + set nocommands "" + } + + #set parts [_tstr_split $templatestring] + set parts [_parse_tstr_parts $templatestring] + set textchunks [list] + #set expressions [list] + set params [list] + set idx 0 + foreach {pt expression} $parts { + lappend textchunks $pt + incr idx ;#pt incr + + #ignore last expression + if {$idx == [llength $parts]} { + break + } + #lappend expressions $expression + lappend params [uplevel 1 [list subst {*}$nocommands $expression]] + + incr idx ;#expression incr + } + switch -- $opt_return { + dict { + return [dict create template $textchunks params $params] + } + list { + return [list $textchunks {*}$params] + } + string { + set out "" + foreach pt $textchunks param $params { + append out $pt $param + } + return $out + } + default { + } + } + } + #test single placeholder tstr args where single placeholder must be an int + proc tstr_test_one {args} { + set argd [punk::args::get_dict { + *proc -name tstr_test_one -help {An example/test of a function designed to be called with a js-style curly-braced Tstr. + example: + set id 2 + tstr_test_one {*}[Tstr {Select * from table where id = ${$id} and etc... ;}] + } + + *values -min 2 -max 2 + template -type list -minlen 2 -maxlen 2 -help "This could be supplied directly as a 2 element list of each half of the sql statement - + but the Tstr method above does this for you, and also passes in the id automatically" + + where -type int -help {Integer param for where clause. Tstr mechanism above will pass the id as the second parameter} + } $args] + set template [dict get $argd values template] + set where [dict get $argd values where] + set result [join [list [lindex $template 0] $where [lindex $template 1]] ""] + return $result + } + proc _parse_tstr_parts {templatestring} { + if {$templatestring eq ""} { + return [list] + } + set chars [split $templatestring ""] + set in_placeholder 0 + set tchars "" + set echars "" + set parts [list] + set i 0 + foreach ch $chars { + if {!$in_placeholder} { + set nextch [lindex $chars [expr {$i+1}]] + if {"$ch$nextch" eq "\$\{"} { + set in_placeholder 2 ;#2 to signify we just entered placeholder + lappend parts $tchars + set tchars "" + } else { + append tchars $ch + } + } else { + if {$ch eq "\}"} { + if {[tcl::info::complete $echars]} { + set in_placeholder 0 + lappend parts $echars + set echars "" + } else { + append echars $ch + } + } else { + if {$in_placeholder == 2} { + #skip opening bracket + set in_placeholder 1 + } else { + append echars $ch + } + } + } + incr i + } + if {$tchars ne ""} { + lappend parts $tchars + } + if {[llength $parts] % 2 == 0} { + #always trail with pt for consistency with _perlish_split method so we can test other mechanisms with odd-length pt/code../pt style list + lappend parts "" + } + return $parts + } + #based on punk::ansi::ta::_perlish_split + proc _tstr_split {text} { + if {$text eq ""} { + return {} + } + set list [list] + set start 0 + #ideally re should allow curlies within but we will probably need a custom parser to do it + #(js allows nested string interpolation) + #set re {\$\{[^\}]*\}} + set re {\$\{(?:(?!\$\{).)*\}} + + #eg regexp {\x1b(?:\(0(?:(?:(?!\x1b\(B).)*\x1b\(B)|\)0(?:(?:(?!\x1b\)B).)*\x1b\)B))} $code + + #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW + while {[regexp -start $start -indices -- $re $text match]} { + lassign $match matchStart matchEnd + #puts "->start $start ->match $matchStart $matchEnd" + if {$matchEnd < $matchStart} { + puts "e:$matchEnd < s:$matchStart" + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::index $text $matchStart] + incr start + if {$start >= [tcl::string::length $text]} { + break + } + continue + } + lappend list [tcl::string::range $text $start $matchStart-1] [tcl::string::range $text $matchStart+2 $matchEnd-1] + set start [expr {$matchEnd+1}] + #? + if {$start >= [tcl::string::length $text]} { + break + } + } + return [lappend list [tcl::string::range $text $start end]] + } + + #get info about punk nestindex key ie type: list,dict,undetermined + proc nestindex_info {args} { + set argd [punk::args::get_dict { + -parent -default "" + nestindex + } $args] + set opt_parent [dict get $argd opts -parent] + if {$opt_parent eq ""} { + set parent_type undetermined + } else { + set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing + } + + + } + + + proc pdict {args} { + if {[catch {package require punk::ansi} errM]} { + set sep " = " + } else { + #set sep " [a+ Web-seagreen]=[a] " + set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " + } + set argspec [string map [list %sep% $sep] { + *proc -name pdict -help {Print dict keys,values to channel + (see also showdict)} + + *opts -any 1 + + #default separator to provide similarity to tcl's parray function + -separator -default "%sep%" + -roottype -default "dict" + -substructure -default {} + -channel -default stdout -help "existing channel - or 'none' to return as string" + + *values -min 1 -max -1 + + dictvar -type string -help "name of variable. Can be a dict, list or array" + + patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. + Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) + The system uses similar patterns to the punk pipeline pattern-matching system. + The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. + Segments are classified into list,dict and string operations. + Leading % indicates a string operation - e.g %# gives string length + A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 + A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' + The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. + e.g1 pdict env */%# + the pattern starts with default type dict, so * retrieves all keys & values, + the next hierarchy switches to a string operation to get the length of each value. + e.g2 pdict env W* S* + Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns + e.g3 pdict punk_testd */* + This displays 2 levels of the dict hierarchy. + Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) + - then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. + e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 + Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent + The second level segement in each pattern switches to a dict operation to retrieve the value by key. + When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. + + The pdict function operates on variable names - passing the value to the showdict function which operates on values + } + }] + #puts stderr "$argspec" + set argd [punk::args::get_dict $argspec $args] + + set opts [dict get $argd opts] + set dvar [dict get $argd values dictvar] + set patterns [dict get $argd values patterns] + set isarray [uplevel 1 [list array exists $dvar]] + if {$isarray} { + set dvalue [uplevel 1 [list array get $dvar]] + if {![dict exists $opts -keytemplates]} { + set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] + dict set opts -keytemplates [list $arrdisplay] + } + dict set opts -keysorttype dictionary + } else { + set dvalue [uplevel 1 [list set $dvar]] + } + showdict {*}$opts $dvalue {*}$patterns + } + + #TODO - much. + #showdict needs to be able to show different branches which share a root path + #e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) + # - specify ansi colour per pattern so different branches can be highlighted? + # - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc + # - The current version is incomplete but passably usable. + # - Copy proc and attempt rework so we can get back to this as a baseline for functionality + proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) + #set sep " [a+ Web-seagreen]=[a] " + if {[catch {package require punk::ansi} errM]} { + set sep " = " + set RST "" + set sep_mismatch " mismatch " + } else { + set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " ;#stick to basic default colours for wider terminal support + set RST [punk::ansi::a] + set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch[punk::ansi::a] " + } + package require punk ;#we need pipeline pattern matching features + package require textblock + + set argd [punk::args::get_dict [string map [list %sep% $sep %sep_mismatch% $sep_mismatch] { + *id punk::lib::showdict + *proc -name punk::lib::showdict -help "display dictionary keys and values" + #todo - table tableobject + -return -default "tailtohead" -choices {tailtohead sidebyside} + -channel -default none + -trimright -default 1 -type boolean -help "Trim whitespace off rhs of each line. + This can help prevent a single long line that wraps in terminal from making every line wrap due to long rhs padding + " + -separator -default {%sep%} -help "Separator column between keys and values" + -separator_mismatch -default {%sep_mismatch%} -help "Separator to use when patterns mismatch" + -roottype -default "dict" -help "list,dict,string" + -ansibase_keys -default "" -help "ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" + -substructure -default {} + -ansibase_values -default "" + -keytemplates -default {${$key}} -type list -help "list of templates for keys at each level" + -keysorttype -default "none" -choices {none dictionary ascii integer real} + -keysortdirection -default increasing -choices {increasing decreasing} + *values -min 1 -max -1 + dictvalue -type list -help "dict or list value" + patterns -default "*" -type string -multiple 1 -help "key or key glob pattern" + }] $args] + set opt_sep [dict get $argd opts -separator] + set opt_mismatch_sep [dict get $argd opts -separator_mismatch] + set opt_keysorttype [dict get $argd opts -keysorttype] + set opt_keysortdirection [dict get $argd opts -keysortdirection] + set opt_trimright [dict get $argd opts -trimright] + set opt_keytemplates [dict get $argd opts -keytemplates] + set opt_ansibase_keys [dict get $argd opts -ansibase_keys] + set opt_ansibase_values [dict get $argd opts -ansibase_values] + set opt_return [dict get $argd opts -return] + set opt_roottype [dict get $argd opts -roottype] + set opt_structure [dict get $argd opts -substructure] + + set dval [dict get $argd values dictvalue] + set patterns [dict get $argd values patterns] + + set result "" + + #pattern hierarchy + # */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest + # * @1 @0,%#,%str - segments + # a b 1 0 %# %str - keys + + set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated + set pattern_next_substructure [dict create] + set pattern_this_structure [dict create] + + # -- --- --- --- + #REVIEW + #as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. + #The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). + #todo - determine if there is a more consistent rule-based way to do this rather than adhoc + #e.g pdict something * + #we want the keys from the result as individual lines on lhs + #e.g pdict something @@ + #we want on lhs result on rhs + # = v0 + #e.g pdict something @0-2,@4 + #we currently return: + #0 = v0 + #1 = v1 + #2 = v2 + #4 = v4 + #This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) + #ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. + #this is a tradeoff that could create surprises and make things messy and/or inconsistent. + #todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. + #It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys + #The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment + #that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) + # -- --- --- --- + + set filtered_keys [list] + if {$opt_roottype in {dict list string}} { + #puts "getting keys for roottype:$opt_roottype" + if {[llength $dval]} { + set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} + set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} + foreach pattern_nest $patterns { + set keyset [list] + set keyset_structure [list] + + set segments [split $pattern_nest /] + set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns + #we need to use _split_patterns to separate (e.g to protext commas that appear within quotes) + set patterninfo [punk::_split_patterns $levelpatterns] + #puts stderr "showdict-->_split_patterns: $patterninfo" + foreach v_idx $patterninfo { + lassign $v_idx v idx + #we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) + set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern + switch -exact -- $p { + * - "" { + if {$opt_roottype eq "list"} { + set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + dict set pattern_this_structure $p list + } elseif {$opt_roottype eq "dict"} { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } else { + lappend keyset %string + lappend keyset_structure string + dict set pattern_this_structure $p string + } + } + %# { + dict set pattern_this_structure $p string + lappend keyset %# + lappend keyset_structure string + } + # { + dict set pattern_this_structure $p list + lappend keyset # + lappend keyset_structure list + } + ## { + dict set pattern_this_structure $p dict + lappend keyset [list ## query] + lappend keyset_structure dict + } + @* { + puts ---->HERE<---- + dict set pattern_this_structure $p list + set keys [punk::lib::range 0 [llength $dval]-1] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } + @@ { + #get first k v from dict + dict set pattern_this_structure $p dict + lappend keyset [list @@ query] + lappend keyset_structure dict + } + @*k@* - @*K@* { + #returns keys only + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @*.@* { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + default { + #puts stderr "===p:$p" + #the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! + #we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful + #@@"key,etc" should allow any non-whitespace key + switch -glob -- $p { + {@k\*@*} - {@K\*@*} { + #value glob return keys + #set search [string range $p 4 end] + #dict for {k v} $dval { + # if {[string match $search $v]} { + # lappend keyset $k + # } + #} + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @@* { + #exact match key - review - should raise error to match punk pipe behaviour? + set k [string range $p 2 end] + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + dict set pattern_this_structure $p dict + } + @k@* - @K@* { + set k [string range $p 3 end] + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + dict set pattern_this_structure $p dict + } + {@\*@*} { + #return list of values + #set k [string range $p 3 end] + #lappend keyset {*}[dict keys $dval $k] + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*.@*} { + set k [string range $p 4 end] + set keys [dict keys $dval $k] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + {@v\*@*} - {@V\*@*} { + #value-glob return value + #error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*v@*} - {@\*V@*} { + #key-glob return value + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*@*} - {@\*v@*} - {@\*V@} { + #key glob return val + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @??@* { + #exact key match - no error + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + default { + set this_type $opt_roottype + if {[string match @* $p]} { + #list mode - trim optional list specifier @ + set p [string range $p 1 end] + dict set pattern_this_structure $p list + set this_type list + } elseif {[string match %* $p]} { + dict set pattern_this_structure $p string + lappend keyset $p + lappend keyset_structure string + set this_type string + } + if {$this_type eq "list"} { + dict set pattern_this_structure $p list + if {[string is integer -strict $p]} { + lappend keyset $p + lappend keyset_structure list + } elseif {[string match "?*-?*" $p]} { + #could be either - don't change type + #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers + #now we should map _ to "" first + set p [string map {_ {}} $p] + #lassign [textutil::split::splitx $p {\.\.}] a b + if {![regexp $re_idxdashidx $p _match a b]} { + error "unrecognised pattern $p" + } + set lower_resolve [punk::lib::lindex_resolve $dval $a] ;#-2 for too low, -1 for too high + #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds + if {${lower_resolve} == -1} { + #lower bound is above upper list range + #match with decreasing indices is still possible + set lower [expr {[llength $dval]-1}] ;#set to max + } elseif {$lower_resolve == -2} { + set lower 0 + } else { + set lower $lower_resolve + } + set upper [punk::lib::lindex_resolve $dval $b] + if {$upper == -2} { + #upper bound is below list range - + if {$lower_resolve >=-1} { + set upper 0 + } else { + continue + } + } elseif {$upper == -1} { + #use max + set upper [expr {[llength $dval]-1}] + #assert - upper >=0 because we have ruled out empty lists + } + #note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order + set keys [punk::lib::range $lower $upper] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + lappend keyset [list @$p query] + lappend keyset_structure list + } + } elseif {$this_type eq "string"} { + dict set pattern_this_structure $p string + } elseif {$this_type eq "dict"} { + #default equivalent to @\*@* + dict set pattern_this_structure $p dict + #puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" + set keys [dict keys $dval $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + puts stderr "list: unrecognised pattern $p" + } + } + } + } + } + } + + # -- --- --- --- + #check next pattern-segment for substructure type to use + # -- --- --- --- + set substructure "" + set pnext [lindex $segments 1] + set patterninfo [punk::_split_patterns $levelpatterns] + if {[llength $patterninfo] == 0} { + # // ? -review - what does this mean? for xpath this would mean at any level + set substructure [lindex $pattern_this_structure end] + } elseif {[llength $patterninfo] == 1} { + # single type in segment e.g /@@something/ + switch -exact $pnext { + "" { + set substructure string + } + @*k@* - @*K@* - @*.@* - ## { + set substructure dict + } + # { + set substructure list + } + ## { + set substructure dict + } + %# { + set substructure string + } + * { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + default { + switch -glob -- $pnext { + @??@* - @?@* - @@* { + #all 4 or 3 len prefixes bounded by @ are dict + set substructure dict + } + default { + if {[string match @* $pnext]} { + set substructure list + } elseif {[string match %* $pnext]} { + set substructure string + } else { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + } + } + } + } + } else { + #e.g /@0,%str,.../ + #doesn't matter what the individual types are - we have a list result + set substructure list + } + #puts "--pattern_nest: $pattern_nest substructure: $substructure" + dict set pattern_next_substructure $pattern_nest $substructure + # -- --- --- --- + + if {$opt_keysorttype ne "none"} { + set int_keyset 1 + foreach k $keyset { + if {![string is integer -strict $k]} { + set int_keyset 0 + break + } + } + if {$int_keyset} { + set sortindices [lsort -indices -integer $keyset] + #set keyset [lsort -integer $keyset] + } else { + #set keyset [lsort -$opt_keysorttype $keyset] + set sortindices [lsort -indices -$opt_keysorttype $keyset] + } + set keyset [lmap i $sortindices {lindex $keyset $i}] + set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] + } + + foreach k $keyset { + lappend pattern_key_index $pattern_nest + } + + lappend filtered_keys {*}$keyset + lappend all_keyset_structure {*}$keyset_structure + + #puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" + } + } + #puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" + } else { + puts stdout "unrecognised roottype: $opt_roottype" + return $dval + } + + if {[llength $filtered_keys]} { + #both keys and values could have newline characters. + #simple use of 'format' won't cut it for more complex dict keys/values + #use block::width or our columns won't align in some cases + switch -- $opt_return { + "tailtohead" { + #last line of key is side by side (possibly with separator) with first line of value + #This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values + #we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries + + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt {${$key}} + } + #set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] + set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] + set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] + + set kidx 0 + set last_hidekey 0 + foreach keydisplay $display_keys key $filtered_keys { + set thisval "?" + set hidekey 0 + set pattern_nest [lindex $pattern_key_index $kidx] + set pattern_nest_list [split $pattern_nest /] + #set this_type [dict get $pattern_this_structure $pattern_nest] + #set this_type [dict get $pattern_this_structure $key] + set this_type [lindex $all_keyset_structure $kidx] + #puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" + + set is_match 1 ;#whether to display the normal separator or bad-match separator + switch -- $this_type { + dict { + #todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict + # - default highlight dupes (ansi underline?) + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + % thisval.= $qry= $dval + } else { + set thisval [tcl::dict::get $dval $key] + } + + #set substructure [lrange $opt_structure 1 end] + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + + set subansibasekeys [lrange $opt_ansibase_keys 1 end] + set nextkeytemplates [lrange $opt_keytemplates 1 end] + #dict set nextopts -substructure $nextsub + dict set nextopts -keytemplates $nextkeytemplates + dict set nextopts -ansibase_keys $subansibasekeys + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + #puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" + + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + #puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" + set is_match 0 + } + } + } + list { + if {[string is integer -strict $key]} { + set thisval [lindex $dval $key] + } else { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + % thisval.= $qry= $dval + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + #if {![llength $nextpatterns]} { + # set nextpatterns * + #} + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + set is_match 0 + } + } + } + string { + set hidekey 1 + if {$key eq "%string"} { + set hidekey 1 + set thisval $dval + } elseif {$key eq "%ansiview"} { + set thisval [ansistring VIEW -lf 1 $dval] + } elseif {$key eq "%ansiviewstyle"} { + set thisval [ansistring VIEWSTYLE -lf 1 $dval] + } elseif {[string match *lpad-* $key]} { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which left -width $width] + } elseif {[string match *lpadstr-* $key]} { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which left -width $width -padchar $extra] + } elseif {[string match *rpad-* $key]} { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which right -width $width] + } elseif {[string match *rpadstr-* $key]} { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which right -width $width -padchar $extra] + } else { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + set thisval $dval + if {[string index $key 0] ne "%"} { + set key %$key + } + % thisval.= $key= $thisval + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + #set nextopts [dict get $argd opts] + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + if {[llength $nextpatterns]} { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } + + } + } + if {$this_type eq "string" && $hidekey} { + lassign [textblock::size $thisval] _vw vwidth _vh vheight + #set blanks_above [string repeat \n [expr {$kheight -1}]] + set vblock $opt_ansibase_values$thisval$RST + #append result [textblock::join_basic -- $vblock] + #review - we wouldn't need this space if we had a literal %sp %sp-x ?? + append result " $vblock" + } else { + set ansibase_key [lindex $opt_ansibase_keys 0] + + lassign [textblock::size $keydisplay] _kw kwidth _kh kheight + lassign [textblock::size $thisval] _vw vwidth _vh vheight + + set totalheight [expr {$kheight + $vheight -1}] + set blanks_above [string repeat \n [expr {$kheight -1}]] + set blanks_below [string repeat \n [expr {$vheight -1}]] + + if {$is_match} { + set use_sep $opt_sep + } else { + set use_sep $opt_mismatch_sep + } + + + set sepwidth [textblock::width $use_sep] + set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] + set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] + set vblock $blanks_above$opt_ansibase_values$thisval$RST + #only vblock is ragged - we can do a basic join because we don't care about rhs whitespace + if {$last_hidekey} { + append result \n + } + append result [textblock::join_basic -- $kblock $sblock $vblock] \n + } + set last_hidekey $hidekey + incr kidx + } + } + "sidebyside" { + #todo + #This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. + #use ansibase_key etc to make the output more comprehensible in that situation. + #This is why it is not the default. (review - terminal width detection and wrapping?) + set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] + foreach key $filtered_keys { + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt "%k%" + } + set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST + #append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n + #differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic + append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n + } + } + } + } + if {$opt_trimright} { + set result [::join [lines_as_list -line trimright $result] \n] + } + if {[string last \n $result] == [string length $result]-1} { + set result [string range $result 0 end-1] + } + #stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) + set chan [dict get $argd opts -channel] + switch -- $chan { + stderr - stdout { + puts $chan $result + } + none { + return $result + } + default { + #review - check member of chan names? + #just try outputting to the supplied channel for now + puts $chan $result + } + } + } + + proc is_list_all_in_list {small large} { + package require struct::list + package require struct::set + set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] + return [struct::list equal [lsort $small] $small_in_large] + } + proc is_list_all_ni_list {a b} { + package require struct::set + set i [struct::set intersect $a $b] + return [expr {[llength $i] == 0}] + } + + #somewhat like struct::set difference - but order preserving, and doesn't treat as a 'set' so preserves dupes in fromlist + #struct::set difference may happen to preserve ordering when items are integers, but order can't be relied on, + # especially as struct::set has 2 differing implementations (tcl vs critcl) which return results with different ordering to each other and different deduping behaviour in some cases (e.g empty 2nd arg) + proc ldiff {fromlist removeitems} { + if {[llength $removeitems] == 0} {return $fromlist} + set result {} + foreach item $fromlist { + if {$item ni $removeitems} { + lappend result $item + } + } + return $result + } + proc ldiff2 {fromlist removeitems} { + set doomed [list] + foreach item $removeitems { + lappend doomed {*}[lsearch -all -exact $fromlist $item] + } + lremove $fromlist {*}$doomed + } + + #non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B + #consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference + #also struct::set difference with critcl is faster + proc setdiff {A B} { + if {[llength $A] == 0} {return {}} + set d [dict create] + foreach x $A {dict set d $x {}} + foreach x $B {dict unset d $x} + return [dict keys $d] + } + #bulk dict remove is slower than a foreach with dict unset + #proc setdiff2 {fromlist removeitems} { + # #if {[llength $fromlist] == 0} {return {}} + # set d [dict create] + # foreach x $fromlist { + # dict set d $x {} + # } + # return [dict keys [dict remove $d {*}$removeitems]] + #} + #array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) + proc setdiff_unordered {A B} { + if {[llength $A] == 0} {return {}} + array set tmp {} + foreach x $A {::set tmp($x) .} + foreach x $B {catch {unset tmp($x)}} + return [array names tmp] + } + + package require struct::set + if {[struct::set equal [struct::set union {a a} {}] {a}]} { + proc lunique_unordered {list} { + struct::set union $list {} + } + } else { + puts stderr "WARNING: struct::set union no longer dedupes!" + #we could also test a sequence of: struct::set add + proc lunique_unordered {list} { + tailcall lunique $list + } + } + #order-preserving + proc lunique {list} { + set new {} + foreach item $list { + if {$item ni $new} { + lappend new $item + } + } + return $new + } + proc lunique2 {list} { + set doomed [list] + #expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) + for {set i 0} {$i < [llength $list]} {} { + set item [lindex $list $i] + lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] + while {[incr i] in $doomed} {} + } + lremove $list {*}$doomed + } + #The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env + proc lmapflat_closure {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + # -- --- --- + #capture - use uplevel 1 or namespace eval depending on context + set capture [uplevel 1 { + apply { varnames { + set capturevars [tcl::dict::create] + set capturearrs [tcl::dict::create] + foreach fullv $varnames { + set v [tcl::namespace::tail $fullv] + upvar 1 $v var + if {[info exists var]} { + if {(![array exists var])} { + tcl::dict::set capturevars $v $var + } else { + tcl::dict::set capturearrs capturedarray_$v [array get var] + } + } else { + #A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set + } + } + return [tcl::dict::create vars $capturevars arrs $capturearrs] + } } [info vars] + } ] + # -- --- --- + set cvars [tcl::dict::get $capture vars] + set carrs [tcl::dict::get $capture arrs] + set apply_script "" + foreach arrayalias [tcl::dict::keys $carrs] { + set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { + array set %realname% [set %arrayalias%][unset %arrayalias%] + }] + } + + append apply_script [string map [list %script% $script] { + #foreach arrayalias [info vars capturedarray_*] { + # set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + # array set $realname [set $arrayalias][unset arrayalias] + #} + #return [eval %script%] + %script% + }] + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\ + $apply_script\ + ] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] + } + return $result + } + #link version - can write to vars in calling context - but keeps varnames themselves isolated + #performance much better than capture version - but still a big price to pay for the isolation + proc lmapflat_link {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list info vars]] + set nscaller [uplevel 1 [list namespace current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + #proc lmapflat {varnames list script} { + # concat {*}[uplevel 1 [list lmap $varnames $list $script]] + #} + #lmap can accept multiple var list pairs + proc lmapflat {args} { + concat {*}[uplevel 1 [list lmap {*}$args]] + } + proc lmapflat2 {args} { + concat {*}[uplevel 1 lmap {*}$args] + } + + proc dict_getdef {dictValue args} { + if {[llength $args] < 1} { + error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} + } + set keys [lrange $args -1 end-1] + if {[tcl::dict::exists $dictValue {*}$keys]} { + return [tcl::dict::get $dictValue {*}$keys] + } else { + return [lindex $args end] + } + } + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features + #safe in that we don't evaluate the expression as a string. + proc offset_expr {expression} { + set expression [tcl::string::map {_ {}} $expression] + if {[tcl::string::is integer -strict $expression]} { + return [expr {$expression}] + } + if {[regexp {(.*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + return [expr {$a - $b}] + } else { + return [expr {$a + $b}] + } + } else { + error "bad expression '$expression': must be integer?\[+-\]integer?" + } + } + + proc lindex_resolve {list index} { + #*** !doctools + #[call [fun lindex_resolve] [arg list] [arg index]] + #[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list + #[para]Users may define procs which accept a list index and wish to accept the forms understood by Tcl. + #[para]This means the proc may be called with something like $x+2 end-$y etc + #[para]Sometimes the actual integer index is desired. + #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. + #[para]lindex_resolve will parse the index expression and return: + #[para] a) -2 if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) -1 if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para]Otherwise it will return an integer corresponding to the position in the list. + #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out or bounds indices, or in the case of lrange, return results anyway. + #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + + #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr + #if {![llength $list]} { + # #review + # return ??? + #} + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + if {$index < 0} { + return -2 + } elseif {$index >= [llength $list]} { + return -1 + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } else { + if {[string match end* $index]} { + if {$index ne "end"} { + set op [string index $index 3] + set offset [string range $index 4 end] + if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$op eq "+" && $offset != 0} { + return -1 + } + } else { + #end + set index [expr {[llength $list]-1}] + if {$index < 0} { + #special case - end with empty list - treat end like a positive number out of bounds + return -1 + } else { + return $index + } + } + if {$offset == 0} { + set index [expr {[llength $list]-1}] + if {$index < 0} { + return -1 ;#special case + } else { + return $index + } + } else { + #by now, if op = + then offset = 0 so we only need to handle the minus case + set index [expr {([llength $list]-1) - $offset}] + } + if {$index < 0} { + return -2 + } else { + return $index + } + } else { + #plain +- already handled above. + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { + if {[string is integer -strict $a] && [string is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + if {$index < 0} { + return -2 + } elseif {$index >= [llength $list]} { + return -1 + } + return $index + } + } + } + proc lindex_resolve2 {list index} { + #set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here. + #for {set i 0} {$i < [llength $list]} {incr i} { + # lappend indices $i + #} + if {[llength $list]} { + set indices [punk::lib::range 0 [expr {[llength $list]-1}]] ;# uses lseq if available, has fallback. + } else { + set indices [list] + } + set idx [lindex $indices $index] + if {$idx eq ""} { + return -1 + } else { + return $idx + } + } + proc lindex_get {list index} { + set resultlist [lrange $list $index $index] + if {![llength $resultlist]} { + return -1 + } else { + #we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. + #we can return the value - but only in a way that won't collide with our -1 out-of-range indicator + return [tcl::dict::create value [lindex $resultlist 0]] + } + } + + + proc K {x y} {return $x} + #*** !doctools + #[call [fun K] [arg x] [arg y]] + #[para]The K-combinator function - returns the first argument, x and discards y + #[para]see [uri https://wiki.tcl-lang.org/page/K] + #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. + + + proc is_utf8_multibyteprefix {bytes} { + #*** !doctools + #[call [fun is_utf8_multibyteprefix] [arg str]] + #[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character + #[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint + #[para] Will return false for an already complete utf-8 codepoint + #[para] It is assumed the incomplete sequence is at the beginning of the bytes argument + #[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes + #[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] + regexp {(?x) + ^ + (?: + [\xC0-\xDF] | #possible prefix for two-byte codepoint + [\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint + [\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for + ) + $ + } $bytes + } + + proc is_utf8_first {str} { + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + } $str + } + proc is_utf8_single {1234bytes} { + #*** !doctools + #[call [fun is_utf8_single] [arg 1234bytes]] + #[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + $ + } $1234bytes + } + proc get_utf8_leading {rawbytes} { + #*** !doctools + #[call [fun get_utf8_leading] [arg rawbytes]] + #[para] return the leading portion of rawbytes that is a valid utf8 sequence. + #[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint + #[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. + #[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. + #[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics + #[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned + #[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes + if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + \A ( + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + + } $rawbytes completeChars]} { + return $completeChars + } + return "" + } + proc hex2dec {args} { + #*** !doctools + #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] + #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values + #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 + #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. + #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 + #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 + + set list_largeHex [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" + } + set opts [tcl::dict::create\ + -validate 1\ + -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $opts] + foreach {k v} $argopts { + tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v + } + # -- --- --- --- + set opt_validate [tcl::dict::get $opts -validate] + set opt_empty [tcl::dict::get $opts -empty_as_hex] + # -- --- --- --- + + set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map [list _ ""] [string trim $h]}] + if {$opt_validate} { + #Note appended F so that we accept list of empty strings as per the documentation + if {![string is xdigit -strict [join $list_largeHex ""]F ]} { + error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" + } + } + if {![string is xdigit -strict [string map [list _ ""] $opt_empty]]} { + #mapping empty string to a value destroys any advantage of -scanonly + #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] + if {[lsearch $list_largeHex ""] >=0} { + error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" + } + } else { + set opt_empty [string trim [string map [list _ ""] $opt_empty]] + if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] + set nonempty_head [lrange $list_largeHex 0 $first_empty-1] + set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] + } + + proc dec2hex {args} { + #*** !doctools + #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] + #[para]Convert a list of decimal integers to a list of hex values + #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. + #[para] -case upper|lower determines the case of the hex letters in the output + set list_decimals [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" + } + set defaults [tcl::dict::create\ + -width 1\ + -case upper\ + -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $defaults] + set fullopts [tcl::dict::create] + foreach {k v} $argopts { + tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v + } + set opts [tcl::dict::merge $defaults $fullopts] + # -- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_case [tcl::dict::get $opts -case] + set opt_empty [tcl::dict::get $opts -empty_as_decimal] + # -- --- --- --- + + + set resultlist [list] + switch -- [string tolower $opt_case] { + upper { + set spec X + } + lower { + set spec x + } + default { + error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" + } + } + set fmt "%${opt_width}.${opt_width}ll${spec}" + + set list_decimals [lmap d $list_decimals[unset list_decimals] {string map [list _ ""] [string trim $d]}] + if {![string is digit -strict [string map [list _ ""] $opt_empty]]} { + if {[lsearch $list_decimals ""] >=0} { + error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" + } + } else { + set opt_empty [string map [list _ ""] $opt_empty] + if {[set first_empty [lsearch $list_decimals ""]] >= 0} { + set nonempty_head [lrange $list_decimals 0 $first_empty-1] + set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] + } + + proc log2 x "expr {log(\$x)/[expr log(2)]}" + #*** !doctools + #[call [fun log2] [arg x]] + #[para]log base2 of x + #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time + #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) + + proc logbase {b x} { + #*** !doctools + #[call [fun logbase] [arg b] [arg x]] + #[para]log base b of x + #[para]This function uses expr's natural log and the change of base division. + #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 + #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 + expr {log($x)/log($b)} + } + proc factors {x} { + #*** !doctools + #[call [fun factors] [arg x]] + #[para]Return a sorted list of the positive factors of x where x > 0 + #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* + #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + #[para]Comparisons were done with some numbers below 17 digits long + #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + #but has the disadvantage of being slower for 'small' numbers and using more memory. + #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x + #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + #[para] In other mathematical contexts zero may be considered not to divide anything. + set factors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {($x % $j) == 0} { + lappend factors $j [expr {$x / $j}] + } + incr j + } + lappend factors $x + return [lsort -unique -integer $factors] + } + proc oddFactors {x} { + #*** !doctools + #[call [fun oddFactors] [arg x]] + #[para]Return a list of odd integer factors of x, sorted in ascending order + set j 2 + set max [expr {sqrt($x)}] + set factors [list 1] + while {$j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 != 0} { + if {$other ni $factors} { + lappend factors $other + } + } + if {$j % 2 != 0} { + if {$j ni $factors} { + lappend factors $j + } + } + } + incr j + } + return [lsort -integer -increasing $factors] + } + proc greatestFactorBelow {x} { + #*** !doctools + #[call [fun greatestFactorBelow] [arg x]] + #[para]Return the largest factor of x excluding itself + #[para]factor functions can be useful for console layout calculations + #[para]See Tcllib math::numtheory for more extensive implementations + if {$x % 2 == 0 || $x == 0} { + return [expr {$x / 2}] + } + set j 3 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {$x % $j == 0} { + return [expr {$x / $j}] + } + incr j 2 + } + return 1 + } + proc greatestOddFactorBelow {x} { + #*** !doctools + #[call [fun greatestOddFactorBelow] [arg x]] + #[para]Return the largest odd integer factor of x excluding x itself + if {$x %2 == 0} { + return [greatestOddFactor $x] + } + set j 3 + #dumb brute force - time taken to compute is wildly variable on big numbers + #todo - use a (memoized?) generator of primes to reduce the search space + #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. + set god 1 + set max [expr {sqrt($x)}] + while { $j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 == 0} { + set god $j + } else { + set god [expr {$x / $j}] + #lowest j - so other side must be highest + break + } + } + incr j 2 + } + return $god + } + proc greatestOddFactor {x} { + #*** !doctools + #[call [fun greatestOddFactor] [arg x]] + #[para]Return the largest odd integer factor of x + #[para]For an odd value of x - this will always return x + if {$x % 2 != 0 || $x == 0} { + return $x + } + set r [expr {$x / 2}] + while {$r % 2 == 0} { + set r [expr {$r / 2}] + } + return $r + } + proc gcd {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the greatest common divisor of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para]Graphical use: + #[para]An a by b rectangle can be covered with square tiles of side-length c, + #[para]only if c is a common divisor of a and b + + # + # Apply Euclid's good old algorithm + # + if { $n > $m } { + set t $n + set n $m + set m $t + } + + while { $n > 0 } { + set r [expr {$m % $n}] + set m $n + set n $r + } + + return $m + } + proc lcm {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the lowest common multiple of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para] + set gcd [gcd $n $m] + return [expr {$n*$m/$gcd}] + } + proc commonDivisors {x y} { + #*** !doctools + #[call [fun commonDivisors] [arg x] [arg y]] + #[para]Return a list of all the common factors of x and y + #[para](equivalent to factors of their gcd) + return [factors [gcd $x $y]] + } + + #experimental only - there are better/faster ways + proc sieve n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + tcl::dict::for {next -} $nums break + } + return [concat $primes [tcl::dict::keys $nums]] + } + proc sieve2 n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + #dict for {next -} $nums break + set next [lindex $nums 0] + } + return [concat $primes [tcl::dict::keys $nums]] + } + + proc hasglobs {str} { + #*** !doctools + #[call [fun hasglobs] [arg str]] + #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] + #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. + regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving + } + + proc trimzero {number} { + #*** !doctools + #[call [fun trimzero] [arg number]] + #[para]Return number with left-hand-side zeros trimmed off - unless all zero + #[para]If number is all zero - a single 0 is returned + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + proc substring_count {str substring} { + #*** !doctools + #[call [fun substring_count] [arg str] [arg substring]] + #[para]Search str and return number of occurrences of substring + + #faster than lsearch on split for str of a few K + if {$substring eq ""} {return 0} + set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] + return [expr {$occurrences / [string length $substring]}] + } + + proc dict_merge_ordered {defaults main} { + #*** !doctools + #[call [fun dict_merge_ordered] [arg defaults] [arg main]] + #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. + #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. + #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. + + #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values + return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] + } + + proc askuser {question} { + #*** !doctools + #[call [fun askuser] [arg question]] + #[para]A basic utility to read an answer from stdin + #[para]The prompt is written to the terminal and then it waits for a user to type something + #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. + #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. + #[para](Generic terminal raw vs linemode detection not yet present) + #[para]The user must hit enter to submit the response + #[para]The return value is the string if any that was typed prior to hitting enter. + #[para]The question argument can be manually colourised using the various punk::ansi funcitons + #[example_begin] + # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] + # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { + # puts "Proceeding" + # } else { + # puts "Cancelled by user" + # } + #[example_end] + puts stdout $question + flush stdout + set stdin_state [fconfigure stdin] + if {[catch { + package require punk::console + set console_raw [set ::punk::console::is_raw] + } err_console]} { + #assume normal line mode + set console_raw 0 + } + try { + fconfigure stdin -blocking 1 + if {$console_raw} { + punk::console::disableRaw + set answer [gets stdin] + punk::console::enableRaw + } else { + set answer [gets stdin] + } + } finally { + fconfigure stdin -blocking [tcl::dict::get $stdin_state -blocking] + } + return $answer + } + + #like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. + proc indent {text {prefix " "}} { + set result [list] + foreach line [split $text \n] { + if {[string trim $line] eq ""} { + lappend result "" + } else { + lappend result $prefix[string trimright $line] + } + } + return [join $result \n] + } + proc undent {text} { + if {$text eq ""} { + return "" + } + set lines [split $text \n] + set nonblank [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + continue + } + lappend nonblank $ln + } + set lcp [longestCommonPrefix $nonblank] + if {$lcp eq ""} { + return $text + } + regexp {^([\t ]*)} $lcp _m lcp + if {$lcp eq ""} { + return $text + } + set len [string length $lcp] + set result [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + lappend result "" + } else { + lappend result [string range $ln $len end] + } + } + return [join $result \n] + } + #A version of textutil::string::longestCommonPrefixList + proc longestCommonPrefix {items} { + if {[llength $items] <= 1} { + return [lindex $items 0] + } + set items [lsort $items[unset items]] + set min [lindex $items 0] + set max [lindex $items end] + #if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) + #(sort order nothing to do with length - e.g min may be longer than max) + if {[string length $min] > [string length $max]} { + set temp $min + set min $max + set max $temp + } + set n [string length $min] + set prefix "" + set i -1 + while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { + append prefix $c + } + return $prefix + } + #test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var + proc swapnumvars {namea nameb} { + upvar $namea a $nameb b + set a [expr {$a ^ $b}] + set b [expr {$a ^ $b}] + set a [expr {$a ^ $b}] + } + + #e.g linesort -decreasing $data + proc linesort {args} { + #*** !doctools + #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] + #[para]Sort lines in textblock + #[para]Returns another textblock with lines sorted + #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique + if {[llength $args] < 1} { + error "linesort missing lines argument" + } + set lines [lindex $args end] + set opts [lrange $args 0 end-1] + #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts + list_as_lines [lsort {*}$opts [linelist $lines]] + } + + proc list_as_lines {args} { + #*** !doctools + #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] + #[para]This simply joines the elements of the list with -joinchar + #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines + #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. + if {[set eop [lsearch $args --]] == [llength $args]-2} { + #end-of-opts not really necessary - except for consistency with lines_as_list + set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] + } + if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { + set joinchar [lindex $args 1] + set lines [lindex $args 2] + } elseif {[llength $args] == 1} { + set joinchar "\n" + set lines [lindex $args 0] + } else { + error "list_as_lines usage: list_as_lines ?-joinchar ? " + } + return [join $lines $joinchar] + } + proc list_as_lines2 {args} { + #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? + lassign [tcl::dict::values [punk::args::get_dict { + -joinchar -default \n + *values -min 1 -max 1 + } $args]] opts values + puts "opts:$opts" + puts "values:$values" + return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] + } + + proc lines_as_list {args} { + #*** !doctools + #[call [fun lines_as_list] [opt {option value ...}] [arg text]] + #[para]Returns a list of possibly trimmed lines depeding on options + #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf + #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements + + #The underlying function linelist has the validation code which gives nicer usage errors. + #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error + #..because we don't know what to say if there are odd numbers of args + #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work + #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway + + if {[lsearch $args "--"] == [llength $args]-2} { + set opts [lrange $args 0 end-2] + } else { + set opts [lrange $args 0 end-1] + } + #set opts [tcl::dict::merge {-block {}} $opts] + set bposn [lsearch $opts -block] + if {$bposn < 0} { + lappend opts -block {} + } + set text [lindex $args end] + tailcall linelist {*}$opts $text + } + #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds + proc lines_as_list2 {args} { + #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults + #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc + #we don't have to decide what is an opt vs a value + #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) + lassign [tcl::dict::values [punk::args::get_dict { + *opts -any 1 + -block -default {} + } $args]] opts valuedict + tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] + } + + # important for pipeline & match_assign + # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? + # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace + proc linelist {args} { + set usage "linelist ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map [list \r\n \n] $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {[set posn [lsearch $opt_block trimhead1]] >=0 && "trimhead" in $opt_block} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {[set posn [lsearch $opt_block trimtail1]] >=0 && "trimtail" in $opt_block} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + package require punk::ansi + if {$opt_ansiresets} { + set RST [punk::ansi::a] + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) + if {![punk::ansi::ta::detect_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + set ansisplits [punk::ansi::ta::split_codes_single $ln] + if {[llength $ansisplits]<= 1} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[lindex $ansisplits end] eq ""} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach {pt code} $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + + + interp alias {} errortime {} punk::lib::errortime + proc errortime {script groupsize {iters 2}} { + #by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance + set i 0 + set times {} + if {$iters < 2} {set iters 2} + + for {set i 0} {$i < $iters} {incr i} { + set result [uplevel [list time $script $groupsize]] + lappend times [lindex $result 0] + } + + set average 0.0 + set s2 0.0 + + foreach time $times { + set average [expr {$average + double($time)/$iters}] + } + + foreach time $times { + set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] + } + + set sigma [expr {int(sqrt($s2))}] + set average [expr int($average)] + + return "$average +/- $sigma microseconds per iteration" + } + + #test function to use with show_jump_tables + #todo - check if switch compilation to jump tables differs by Tcl version + proc switch_char_test {c} { + set dec [scan $c %c] + foreach t [list 1 2 3] { + switch -- $c { + x { + return [list $dec x $t] + } + y { + return [list $dec y $t] + } + z { + return [list $dec z $t] + } + } + } + + #tcl 8.6/8.7 (at least) + #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable + switch -- $c { + a { + return [list $dec a] + } + {"} { + return [list $dec dquote] + } + {[} {return [list $dec lb]} + {]} {return [list $dec rb]} + "{" { + return [list $dec lbrace] + } + "}" { + return [list $dec rbrace] + } + default { + return [list $dec $c] + } + } + + + + } + + #we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) + proc show_jump_tables {args} { + #avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. + if {[llength $args] == 1} { + set data [tcl::unsupported::disassemble proc [lindex $args 0]] + } elseif {[llength $args] == 2} { + #review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. + #not sure if this handles more complex hierarchies or mixins etc. + lassign $args obj method + if {![info object isa object $obj]} { + error "show_jump_tables unable to examine '$args'. $obj is not an oo object" + } + #classes are objects too and can have direct methods + if {$method in [info object methods $obj]} { + set data [tcl::unsupported::disassemble objmethod $obj $method] + } else { + if {![info object isa class $obj]} { + set obj [info object class $obj] + } + set data [tcl::unsupported::disassemble method $obj $method] + } + } else { + error "show_jump_tables expected a procname or a class/object and method" + } + set result "" + set in_jt 0 + foreach ln [split $data \n] { + set tln [string trim $ln] + if {!$in_jt} { + if {[string match *jumpTable* $ln]} { + append result $ln \n + set in_jt 1 + } + } else { + if {[string match Command* $tln] || [string match "(*) *" $tln]} { + set in_jt 0 + } else { + append result $ln \n + } + } + } + return $result + } + + proc temperature_f_to_c {deg_fahrenheit} { + return [expr {($deg_fahrenheit -32) * (5/9.0)}] + } + proc temperature_c_to_f {deg_celsius} { + return [expr {($deg_celsius * (9/5.0)) + 32}] + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#todo - way to generate 'internal' docs separately? +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::lib::system { + #*** !doctools + #[subsection {Namespace punk::lib::system}] + #[para] Internal functions that are not part of the API + #[list_begin definitions] + + proc has_script_var_bug {} { + set script {set j [list spud] ; list} + append script \n + uplevel #0 $script + set rep1 [tcl::unsupported::representation $::j] + set script "" + set rep2 [tcl::unsupported::representation $::j] + + set nostring1 [string match "*no string" $rep1] + set nostring2 [string match "*no string" $rep2] + + #we assume it should have no string rep in either case + #Review: check Tcl versions for behaviour/consistency + if {!$nostring2} { + return true + } else { + return false + } + } + proc has_safeinterp_compile_bug {{show 0}} { + #ensemble calls within safe interp not compiled + namespace eval [namespace current]::testcompile { + proc ensembletest {} {string index a 0} + } + + set has_bug 0 + + set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] + if {$show} { + puts outer: + puts $bytecode_outer + } + if {![interp issafe]} { + #test of safe subinterp only needed if we aren't already in a safe interp + if {![catch { + interp create x -safe + } errMsg]} { + x eval {proc ensembletest {} {string index a 0}} + set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] + if {$show} { + puts safe: + puts $bytecode_safe + } + interp delete x + #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) + #It's possible the interp we're running in is also not compiling ensembles. + #we could then get a result of 2 - which still indicates a problem + if {[string last "invokeStk" $bytecode_safe] >= 1} { + incr has_bug + } + } else { + #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? + #unlikely - but we should warn + puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" + } + } + + namespace delete [namespace current]::testcompile + + if {[string last "invokeStk" $bytecode_outer] >= 1} { + incr has_bug + } + return $has_bug + } + + proc mostFactorsBelow {n} { + ##*** !doctools + #[call [fun mostFactorsBelow] [arg n]] + #[para]Find the number below $n which has the greatest number of factors + #[para]This will get slow quickly as n increases (100K = 1s+ 2024) + set most 0 + set mostcount 0 + for {set i 1} {$i < $n} {incr i} { + set fc [llength [punk::lib::factors $i]] + if {$fc > $mostcount} { + set most $i + set mostcount $fc + } + } + return [list number $most numfactors $mostcount] + } + proc factorCountBelow_punk {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [punk::lib::factors $i]] + } + return $tally + } + proc factorCountBelow_numtheory {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) + package require math::numtheory + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [math::numtheory::factors $i]] + } + return $tally + } + + proc factors2 {x} { + ##*** !doctools + #[call [fun factors2] [arg x]] + #[para]Return a sorted list of factors of x + #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. + set smallfactors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j < $max} { + if {($x % $j) == 0} { + lappend smallfactors $j + lappend largefactors [expr {$x / $j}] + } + incr j + } + #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop + if {($x % $j) == 0} { + if {$j == ($x / $j)} { + lappend smallfactors $j + } + } + return [concat $smallfactors [lreverse $largefactors] $x] + } + + # incomplte - report which is the innermost bracket/quote etc awaiting completion for a Tcl command + #important - used by punk::repl + proc incomplete {partial} { + #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + #puts stderr "-->$clist<--" + set waiting [list ""] + set innerpartials [list ""] + set escaped 0 + set i 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + incr i + continue + } ;# set escaped 0 at end + set p [lindex $innerpartials end] + if {$escaped == 0} { + #NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) + switch -- $c { + {"} { + if {![info complete ${p}]} { + lappend waiting {"} + lappend innerpartials "" + } else { + if {[lindex $waiting end] eq {"}} { + #this quote is endquote + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + if {![info complete ${p}$c]} { + lappend waiting {"} + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } + {[} { + if {![info complete ${p}$c]} { + lappend waiting "\]" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "{" { + if {![info complete ${p}$c]} { + lappend waiting "\}" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "}" - + default { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } else { + set p ${p}${c} + lset innerpartials end $p + } + set escaped 0 + incr i + } + set incomplete [list] + foreach w $waiting { + #to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm. + switch -- $w { + {"} { + lappend incomplete $w + } + {]} { + lappend incomplete "\[" + } + "{" {} + "}" { + lappend incomplete "\{" + } + } + } + set debug 0 + if {$debug} { + foreach w $waiting p $innerpartials { + puts stderr "->awaiting:'$w' partial: $p" + } + } + return $incomplete + } + #This only works for very simple cases will get confused with for example: + # {set x "a["""} + proc incomplete_naive {partial} { + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + set waiting [list] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } + if {!$escaped} { + if {$c eq {"}} { + if {[lindex $waiting end] eq {"}} { + set waiting [lrange $waiting 0 end-1] + } else { + lappend waiting {"} + } + } elseif {$c eq "\["} { + lappend waiting "\]" + } elseif {$c eq "\{"} { + lappend waiting "\}" + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + } + } + } + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + return $incomplete + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::lib [tcl::namespace::eval punk::lib { + variable pkg punk::lib + variable version + set version 0.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix-0.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix-0.2.tm new file mode 100644 index 00000000..24ef156c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix-0.2.tm @@ -0,0 +1,32 @@ + +package require punk::cap + + +tcl::namespace::eval punk::mix { + proc init {} { + package require punk::cap::handlers::templates ;#handler for templates cap + punk::cap::register_capabilityname punk.templates ::punk::cap::handlers::templates ;#time taken should generally be sub 200us + + package require punk::mix::templates ;#registers as provider pkg for 'punk.templates' capability with punk::cap + set t [time { + if {[catch {punk::mix::templates::provider register *} errM]} { + puts stderr "punk::mix failure during punk::mix::templates::provider register *" + puts stderr $errM + puts stderr "-----" + puts stderr $::errorInfo + } + }] + puts stderr "->punk::mix::templates::provider register * t=$t" + } + init + +} + +package require punk::mix::base +package require punk::mix::cli + +package provide punk::mix [tcl::namespace::eval punk::mix { + variable version + set version 0.2 + +}] diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm new file mode 100644 index 00000000..8a4456d1 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/base-0.1.tm @@ -0,0 +1,931 @@ +package provide punk::mix::base [namespace eval punk::mix::base { + variable version + set version 0.1 +}] + +package require punk::path + +#base internal plumbing functions +namespace eval punk::mix::base { + proc set_alias {cmdname args} { + #--------- + #extension@@opts/@?@-extension,args@@args= [_split_args $args] ;#dependency on punk pipeline/patternmatching system + lassign [_split_args $args] _opts opts _args args + if {[dict exists $opts -extension]} { + set extension [dict get $opts -extension] + } else { + set extension "" + } + #--------- + + uplevel #0 [list interp alias {} $cmdname {} punk::mix::base::_cli -extension $extension] + } + proc _cli {args} { + #--------- + #extension@@opts/@?@-extension,args@@args= [_split_args $args] ;#dependency on punk pipeline/patternmatching system + lassign [_split_args $args] _opts opts _args args + if {[dict exists $opts -extension]} { + set extension [dict get $opts -extension] + } else { + set extension "" + } + #--------- + if {![string length $extension]} { + set extension [namespace qualifiers [lindex [info level -1] 0]] + } + #puts stderr "punk::mix::base extension: [string trimleft $extension :]" + if {![string length $extension]} { + #if still no extension - must have been called dirctly as punk::mix::base::_cli + if {![llength $args]} { + set args "help" + } + set extension [namespace current] + } + if {![llength $args]} { + if {[info exists ${extension}::default_command]} { + tailcall $extension [set ${extension}::default_command] + } + tailcall $extension + } else { + tailcall $extension {*}$args + } + } + proc _unknown {ns args} { + #--------- + #extension@@opts/@?@-extension,args@@args= [_split_args $args] ;#dependency on punk pipeline/patternmatching system + lassign [_split_args $args] _opts opts _args args + if {[dict exists $opts -extension]} { + set extension [dict get $opts -extension] + } else { + set extension "" + } + #--------- + + if {![string length $extension]} { + set extension [namespace qualifiers [lindex [info level -1] 0]] + } + #puts stderr "arglen:[llength $args]" + #puts stdout "_unknown '$ns' '$args'" + + set d_commands [get_commands -extension $extension] + set all_commands [list {*}[dict get $d_commands main] {*}[dict get $d_commands base]] + + + error "Unknown subcommand \"[lindex $args 0]\": must be one of: $all_commands" "punk::mix::base _unknown $ns $args" [list unknown_ensemble_subcommand ensemble punk::mix::base] + } + proc _redirected {from_ns subcommand args} { + #puts stderr "_redirected from_ns: $from_ns subcommand:$subcommand args:$args" + set pname [namespace current]::$subcommand + if {$pname in [info procs $pname]} { + set argnames [info args $pname] + #puts stderr "_redirected $subcommand argnames: $argnames" + if {[lindex $argnames end] eq "args"} { + set pos_argnames [lrange $argnames 0 end-1] + } else { + set pos_argnames $argnames + } + set argvals [list] + set numargs [llength $pos_argnames] + if {$numargs > 0} { + set argvals [lrange $args 0 $numargs-1] + set args [lrange $args $numargs end] + } + if {[llength $argvals] < $numargs} { + error "wrong # args: $from_ns $subcommand requires args: $pos_argnames" + } + tailcall [namespace current] $subcommand {*}$argvals {*}$args -extension $from_ns + } else { + if {[regexp {.*[*?].*} $subcommand]} { + set d_commands [get_commands -extension $from_ns] + set all_commands [list {*}[dict get $d_commands main] {*}[dict get $d_commands base]] + set matched_commands [lsearch -all -inline $all_commands $subcommand] + set commands "" + foreach m $matched_commands { + append commands $m \n + } + return $commands + } + tailcall [namespace current] $subcommand {*}$args -extension $from_ns + } + } + proc _split_args {arglist} { + #don't assume arglist is fully paired. + set posn [lsearch $arglist -extension] + set opts [list] + if {$posn >= 0} { + if {$posn+2 <= [llength $arglist]} { + set opts [list -extension [lindex $arglist $posn+1]] + set argsremaining [lreplace $arglist $posn $posn+1] + } else { + #no value supplied to -extension + error "punk::mix::base::_split_args - no value found for option '-extension'. Supply a value or omit the option." + } + } else { + set argsremaining $arglist + } + + return [list opts $opts args $argsremaining] + } +} + + +#base API (potentially overridden functions - may also be called from overriding namespace) +#commands should either handle or silently ignore -extension +namespace eval punk::mix::base { + namespace ensemble create + namespace export help dostuff get_commands set_alias + namespace ensemble configure [namespace current] -unknown punk::mix::base::_unknown + proc get_commands {args} { + #--------- + #extension@@opts/@?@-extension,args@@args= [_split_args $args] ;#dependency on punk pipeline/patternmatching system + lassign [_split_args $args] _opts opts _args args + if {[dict exists $opts -extension]} { + set extension [dict get $opts -extension] + } else { + set extension "" + } + #--------- + if {![string length $extension]} { + set extension [namespace qualifiers [lindex [info level -1] 0]] + } + + set maincommands [list] + #extension may still be blank e.g if punk::mix::base::get_commands called directly + if {[string length $extension]} { + set nsmain $extension + #puts stdout "get_commands nsmain: $nsmain" + set parentpatterns [namespace eval $nsmain [list namespace export]] + set nscommands [list] + foreach p $parentpatterns { + lappend nscommands {*}[info commands ${nsmain}::$p] + } + foreach c $nscommands { + set cmd [namespace tail $c] + lappend maincommands $cmd + } + set maincommands [lsort $maincommands] + } + + + + + set nsbase [namespace current] + set basepatterns [namespace export] + #puts stdout "basepatterns:$basepatterns" + set nscommands [list] + foreach p $basepatterns { + lappend nscommands {*}[info commands ${nsbase}::$p] + } + + set basecommands [list] + foreach c $nscommands { + set cmd [namespace tail $c] + if {$cmd ni $maincommands} { + lappend basecommands $cmd + } + } + set basecommands [lsort $basecommands] + + + return [list main $maincommands base $basecommands] + } + proc help {args} { + #' **%ensemblecommand% help** *args* + #' + #' Help for ensemble commands in the command line interface + #' + #' + #' Arguments: + #' + #' * args - first word of args is the helptopic requested - usually a command name + #' - calling help with no arguments will list available commands + #' + #' Returns: help text (text) + #' + #' Examples: + #' + #' ``` + #' %ensemblecommand% help + #' ``` + #' + #' + + + #extension.= @@opts/@?@-extension,args@@args=>. [_split_args $args] {| + # >} inspect -label a {| + # >} .=e>end,data>end pipeswitch { + # pipecase ,0/1/#= $switchargs {| + # e/0 + # >} .=>. {set e} + # pipecase /1,1/1/#= $switchargs + #} |@@ok/result> $args" + set helpstr "" + append helpstr "limit commandlist with a glob search such as *word*" + append helpstr "commands:\n" + + foreach {source cmdlist} $command_info { + append helpstr \n " $source" + foreach cmd $cmdlist { + append helpstr \n " - $cmd" + } + } + return $helpstr + } + #proc dostuff {args} { + # extension@@opts/@?@-extension,args@@args= [_split_args $args] + # puts stdout "base doingstuff-with-args:'$args'-in-namespace:'[namespace current]'" + #} + namespace eval lib { + variable sha3_implementation "" ;#set by cksum_algorithms (which is called by cksum_path) It looks for fossil or sqlite3. Todo - add proper Tcl implementation. + namespace export * + + #----------------------------------------------------- + #literate-programming style naming for some path tests + #Note the naming of the operator portion of a_op_b is consistent in that it is the higher side of the filesystem tree first. + #hence aboveorat vs atorbelow + #These names also sort in the logical order of higher to lower in the filesystem (when considering the root as 'higher' in the filesystem) + proc path_a_above_b {path_a path_b} { + #stripPath prefix path + return [expr {[fileutil::stripPath $path_a $path_b] ni [list . $path_b]}] + } + proc path_a_aboveorat_b {path_a path_b} { + return [expr {[fileutil::stripPath $path_a $path_b] ne $path_b}] + } + proc path_a_at_b {path_a path_b} { + return [expr {[fileutil::stripPath $path_a $path_b] eq "." }] + } + proc path_a_atorbelow_b {path_a path_b} { + return [expr {[fileutil::stripPath $path_b $path_a] ne $path_a}] + } + proc path_a_below_b {path_a path_b} { + return [expr {[fileutil::stripPath $path_b $path_a] ni [list . $path_a]}] + } + proc path_a_inlinewith_b {path_a path_b} { + return [expr {[path_a_aboveorat_b $path_a $path_b] || [path_a_below_b $path_a $path_b]}] + } + #----------------------------------------------------- + + + + #find src/something folders which are not certain known folders with other purposes, (such as: bootsupport .vfs folders or vendor folders etc) and contain .tm file(s) + proc find_source_module_paths {{path {}}} { + if {![string length [set candidate [punk::repo::find_candidate $path]]]} { + error "find_source_module_paths cannot determine a suitable project root at or above path '$path' - path supplied should be within a project" + } + #we can return module paths even if the project isn't yet under revision control + set src_subs [glob -nocomplain -dir [file join $candidate src] -type d -tail *] + set antipatterns [list *.vfs vendor* lib _build doc embedded runtime bootsupport] + set tm_folders [list] + foreach sub $src_subs { + set is_ok 1 + foreach anti $antipatterns { + if {[string match $anti $sub]} { + set is_ok 0 + break + } + } + if {!$is_ok} { + continue + } + set testfolder [file join $candidate src $sub] + #ensure that if src/modules exists - it is always included even if empty + if {[string tolower $sub] eq "modules"} { + lappend tm_folders $testfolder + continue + } + #set tmfiles [glob -nocomplain -dir $testfolder -type f -tail *.tm] + #set podfolders [glob -nocomplain -dir $testfolder -type d -tail #modpod-*] + if {[llength [glob -nocomplain -dir $testfolder -type f -tail *.tm]] || [llength [glob -nocomplain -dir $testfolder -type d -tail #modpod-*]]} { + lappend tm_folders $testfolder + } + } + return $tm_folders + } + + proc mix_templates_dir {} { + puts stderr "mix_templates_dir WARNING: deprecated - use get_template_basefolders instead" + set provide_statement [package ifneeded punk::mix [package require punk::mix]] + set tmdir [file dirname [lindex $provide_statement end]] + set tpldir $tmdir/mix/templates + if {![file exists $tpldir]} { + error "punk::mix::lib::mix_templates_dir unable to locate mix templates folder at '$tpldir'" + } + return $tpldir + } + + #get_template_basefolders + # startpath - file or folder + # It represents the base point from which to search for templates folders either directly related to the scriptpath (../) or in the containing project if any + # The cwd will also be searched for project root - but with lower precedence in the resultset (later in list) + proc get_template_basefolders {{startpath ""}} { + # templates from punk.templates provider packages (ordered by order in which packages registered with punk::cap) + if {[file isfile $startpath]} { + set startpath [file dirname $startpath] + } + + package require punk::cap + if {[punk::cap::capability_has_handler punk.templates]} { + set template_folder_dict [punk::cap::call_handler punk.templates folders -startdir $startpath] + } else { + put stderr "get_template_basefolders WARNING - no handler available for the 'punk.templates' capability - template providers will be unable to provide template locations" + } + + #don't sort - order in which encountered defines the precedence - with later overriding earlier + return $template_folder_dict + } + + proc module_subpath {modulename} { + set modulename [string trim $modulename :] + set nsq [namespace qualifiers $modulename] + return [string map {:: /} $nsq] + } + + proc get_build_workdir {path} { + set repo_info [punk::repo::find_repos $path] + set base [lindex [dict get $repo_info project] 0] + if {![string length $base]} { + error "get_build_workdir unable to determine project base for path '$path'" + } + if {![file exists $base/src] || ![file writable $base/src]} { + error "get_build_workdir unable to access $base/src" + } + file mkdir $base/src/_build + return $base/src/_build + } + + + + #todo - move cksum stuff to punkcheck - more logical home + proc cksum_path_content {path args} { + dict set args -cksum_content 1 + dict set args -cksum_meta 0 + tailcall cksum_path $path {*}$args + } + + #not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through + variable cksum_default_opts + set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1] + proc cksum_default_opts {} { + variable cksum_default_opts + return $cksum_default_opts + } + + #crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?) + # - try builtin zlib crc instead? + #sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration. + #adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?) + #sha1 as at 2023 seems a reasonable default + proc cksum_algorithms {} { + variable sha3_implementation + #sha2 is an alias for sha256 + #2023 - no sha3 available in tcllib - we can exec fossil for now - which will be very slow + set algs [list md5 sha1 sha2 sha256 cksum adler32] + set sha3_algs [list sha3 sha3-224 sha3-256 sha3-384 sha3-512] + if {[auto_execok sqlite3] ne ""} { + lappend algs {*}$sha3_algs + set sha3_implementation sqlite3_sha3 + } else { + if {[auto_execok fossil] ne ""} { + lappend algs {*}$sha3_algs + set sha3_implementation fossil_sha3 + } + } + return $algs + } + + proc sqlite3_sha3 {bits filename} { + return [exec sqlite3 :memory: "select lower(hex(sha3(readfile('$filename'),$bits)))"] + } + proc fossil_sha3 {bits filename} { + return [lindex [exec fossil sha3sum -$bits $filename] 0] + } + + #adler32 via file-slurp + proc cksum_adler32_file {filename} { + package require zlib; #should be builtin anyway + set data [punk::mix::util::fcat -translation binary -encoding iso8859-1 $filename] + #set data [fileutil::cat -translation binary $filename] ;#no significant performance diff on windows - and doesn't handle win-illegal names + zlib adler32 $data + } + #zlib crc vie file-slurp + proc cksum_crc_file {filename} { + package require zlib + set data [punk::mix::util::fcat -translation binary -encoding iso8859-1 $filename] + zlib crc $data + } + + + #required to be able to accept relative paths + #for full cksum - using tar could reduce number of hashes to be made.. + #but as it stores metadata such as permission - we don't know if/how the archive will vary based on platform/filesystem + #-noperms only available on extraction - so that doesn't help + #Needs to operate on non-existant paths and return empty string in cksum field + proc cksum_path {path args} { + variable sha3_implementation + if {$path eq {}} { set path [pwd] } + if {[file pathtype $path] eq "relative"} { + set path [file normalize $path] + } + set base [file dirname $path] + set startdir [pwd] + + set defaults [cksum_default_opts] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "cksum_path unknown option '$k' known_options: $known_opts" + } + } + + set opts [dict merge $defaults $args] + set opts_actual $opts ;#default - auto updated to 0 or 1 later + + #if {![file exists $path]} { + # return [list cksum "" opts $opts] + #} + + if {[catch {file type $path} ftype]} { + return [list cksum "" opts $opts] + } + + #review - links? + switch -- $ftype { + file - directory {} + default { + error "cksum_path error file type '$ftype' not supported" + } + } + + + set opt_cksum_algorithm [dict get $opts -cksum_algorithm] + if {$opt_cksum_algorithm ni [cksum_algorithms]} { + return [list error unsupported_cksum_algorithm cksum "" opts $opts] + } + set opt_cksum_acls [dict get $opts -cksum_acls] + if {$opt_cksum_acls} { + puts stderr "cksum_path is not yet able to cksum ACLs" + return + } + + set opt_cksum_meta [dict get $opts -cksum_meta] + set opt_use_tar [dict get $opts -cksum_usetar] + switch -- $ftype { + file { + switch -- $opt_use_tar { + auto { + if {$opt_cksum_meta eq "1"} { + set opt_use_tar 1 + } else { + #prefer no tar if meta not required - faster/simpler + #meta == auto or 0 + set opt_cksum_meta 0 + set opt_use_tar 0 + } + } + 0 { + if {$opt_cksum_meta eq "1"} { + puts stderr "cksum_path doesn't yet support a non-tar cksum with metadata for a file" + return [list error unsupported_meta_without_tar cksum "" opts $opts] + } else { + #meta == auto or 0 + set opt_cksum_meta 0 + } + } + default { + #tar == 1 + if {$opt_cksum_meta eq "0"} { + puts stderr "cksum_path doesn't yet support a tar cksum without metadata for a file" + return [list error unsupported_tar_without_meta cksum "" opts $opts] + } else { + #meta == auto or 1 + set opt_cksum_meta 1 + } + } + } + } + directory { + switch -- $opt_use_tar { + auto { + if {$opt_cksum_meta in [list "auto" "1"]} { + set opt_use_tar 1 + set opt_cksum_meta 1 + } else { + puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto" + return [list error unsupported_directory_cksum_without_meta cksum "" opts $opts] + } + } + 0 { + puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto" + return [list error unsupported_directory_cksum_without_tar cksum "" opts $opts] + } + default { + #tar 1 + if {$opt_cksum_meta eq "0"} { + puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto" + return [list error unsupported_without_meta cksum "" opts $opts] + } else { + #meta == auto or 1 + set opt_cksum_meta 1 + } + } + } + } + } + + dict set opts_actual -cksum_meta $opt_cksum_meta + dict set opts_actual -cksum_usetar $opt_use_tar + + + if {$opt_use_tar} { + package require tar ;#from tcllib + } + + if {$path eq $base} { + #attempting to cksum at root/volume level of a filesystem.. extra work + #This needs fixing for general use.. not necessarily just for project repos + puts stderr "cksum_path doesn't yet support cksum of entire volume. (todo)" + return [list error unsupported_path opts $opts] + } + switch -- $opt_cksum_algorithm { + sha1 { + package require sha1 + #review - any utf8 issues in tcl9? + set cksum_command [list sha1::sha1 -hex -file] + } + sha2 - sha256 { + package require sha256 + set cksum_command [list sha2::sha256 -hex -file] + } + md5 { + package require md5 + set cksum_command [list md5::md5 -hex -file] + } + cksum { + package require cksum ;#tcllib + set cksum_command [list crc::cksum -format 0x%X -file] + } + crc { + set cksum_command [list cksum_crc_file] + } + adler32 { + set cksum_command [list cksum_adler32_file] + } + sha3 - sha3-256 { + #todo - replace with something that doesn't call another process + #set cksum_command [list apply {{file} {lindex [exec fossil sha3sum -256 $file] 0}}] + set cksum_command [list $sha3_implementation 256] + } + sha3-224 - sha3-384 - sah3-512 { + set bits [lindex [split $opt_cksum_algorithm -] 1] + #set cksum_command [list apply {{bits file} {lindex [exec fossil sha3sum -$bits $file] 0}} $bits] + set cksum_command [list $sha3_implementation $bits] + } + } + + set cksum "" + if {$opt_use_tar != 0} { + set target [file tail $path] + set tmplocation [punk::mix::util::tmpdir] + set archivename $tmplocation/[punk::mix::util::tmpfile].tar + + cd $base ;#cd is process-wide.. keep cd in effect for as small a scope as possible. (review for thread issues) + + #temp emission to stdout.. todo - repl telemetry channel + puts stdout "cksum_path: creating temporary tar archive for $path" + puts stdout " at: $archivename .." + tar::create $archivename $target + if {$ftype eq "file"} { + set sizeinfo "(size [file size $target])" + } else { + set sizeinfo "(file type $ftype - size unknown)" + } + puts stdout "cksum_path: calculating cksum for $target $sizeinfo..." + set cksum [{*}$cksum_command $archivename] + #puts stdout "cksum_path: cleaning up.. " + file delete -force $archivename + cd $startdir + + } else { + #todo + if {$ftype eq "file"} { + if {$opt_cksum_meta} { + return [list error unsupported_opts_combo cksum "" opts $opts] + } else { + set cksum [{*}$cksum_command $path] + } + } else { + error "cksum_path unsupported $opts for path type [file type $path]" + } + } + set result [dict create] + dict set result cksum $cksum + dict set result opts $opts_actual + return $result + } + + #dict_path_cksum keyed on path - with value as a dict that must contain cksum key - but can contain other keys + #e.g -cksum_usetar which is one of the keys understood by the punk::mix::base::lib::cksum_path function - or unrelated keys which will also be passed through + #cksum only calculated for keys in dict where cksum is empty - ie return same dict but with empty cksums filled out. + #base can be empty string in which case paths must be absolute + proc fill_relativecksums_from_base_and_relativepathdict {base {dict_path_cksum {}}} { + if {$base eq ""} { + set error_paths [list] + dict for {path pathinfo} $dict_path_cksum { + if {[file pathtype $path] ne "absolute"} { + lappend error_paths $path + } + } + if {[llength $error_paths]} { + puts stderr "get_relativecksums_from_base_and_relativepathdict has empty base - and non-absolute paths in the supplied checksum dict - aborting" + puts stderr "error_paths: $error_paths" + error "fill_relativecksums_from_base_and_relativepathdict error: non-absolute paths when base empty. $error_paths" + } + } else { + if {[file pathtype $base] ne "absolute"} { + error "fill_relativecksums_from_base_and_relativepathdict error: base supplied but was not absolute path. $base" + } + #conversely now we have a base - so we require all paths are relative. + #We will ignore/disallow volume-relative - as these shouldn't be used here either + set error_paths [list] + dict for {path pathinfo} $dict_path_cksum { + if {[file pathtype $path] ne "relative"} { + lappend error_paths $path + } + } + if {[llength $error_paths]} { + puts stderr "fill_relativecksums_from_base_and_relativepathdict has a supplied absolute base path, but some of the paths in the supplied dict are not relative - aborting" + error "fill_relativecksums_from_base_and_relativepathdict error: non-relative paths when base supplied. $error_paths" + } + } + + + dict for {path pathinfo} $dict_path_cksum { + if {![dict exists $pathinfo cksum]} { + dict set pathinfo cksum "" + } else { + if {[dict get $pathinfo cksum] ne "" && ![cksum_is_tag [dict get $pathinfo cksum]]} { + continue ;#already filled with non-tag value + } + } + if {$base ne ""} { + set fullpath [file join $base $path] + } else { + set fullpath $path + } + + set ckopts [cksum_filter_opts {*}$pathinfo] + + if {![file exists $fullpath]} { + dict set dict_path_cksum $path cksum "" + } else { + set ckinfo [cksum_path $fullpath {*}$ckopts] + dict set dict_path_cksum $path cksum [dict get $ckinfo cksum] + dict set dict_path_cksum $path cksum_all_opts [dict get $ckinfo opts] + if {[dict exists $ckinfo error]} { + dict set dict_path_cksum $path cksum_error [dict get $ckinfo error] + } + } + } + + return $dict_path_cksum + } + #whether cksum is e.g + proc cksum_is_tag {cksum} { + expr {[string index $cksum 0] eq "<" && [string index $cksum end] eq ">"} + } + proc cksum_filter_opts {args} { + set ck_opt_names [dict keys [cksum_default_opts]] + set ck_opts [dict create] + foreach {k v} $args { + if {$k in $ck_opt_names} { + dict set ck_opts $k $v + } + } + return $ck_opts + } + + #convenience so caller doesn't have to pre-calculate the relative path from the base + #Note semantic difference from fill_relativecksums_from_base_and_relativepathdict (hence get_ vs fill_) + #Here we will raise an error if cksum exists and is not empty or a tag - whereas the multiple path version will ignore valid-looking prefilled cksum values + #base is the presumed location to store the checksum file. The caller should retain (normalize if relative) + proc get_relativecksum_from_base {base specifiedpath args} { + if {$base ne ""} { + #targetpath ideally should be within same project tree as base if base supplied - but not necessarily below it + #we don't necessarily want to restrict this to use in punk projects though - so we'll allow anything with a common prefix + if {[file pathtype $specifiedpath] eq "relative"} { + if {[file pathtype $base] eq "relative"} { + set normbase [file normalize $base] + set normtarg [file normalize [file join $normbase $specifiedpath]] + set targetpath $normtarg + set storedpath [punk::path::relative $normbase $normtarg] + } else { + set targetpath [file join $base $specifiedpath] + set storedpath $specifiedpath + } + } else { + #specifed absolute + if {[file pathtype $base] eq "relative"} { + #relative to cwd or to specifiedpath? For consistency it should arguably be cwd but a case could be made that when one path is relative it is in reference to the other + #there is a strong possibility that allowing this combination will cause confusion - better to disallow + error "get_relativecksum_from_base error: disallowed pathtype combination. Base must be empty or absolute when specified path is absolute" + } + #both absolute - compute relative path if they share a common prefix + set commonprefix [punk::mix::util::path_common_prefix $base $specifiedpath] + if {$commonprefix eq ""} { + #absolute base with no shared prefix doesn't make sense - we could ignore it - but better to error-out and require the caller specify an empty base + error "get_relativecksum_from_base error: base '$base' and specifiedpath '$specifiedpath' don't share a common root. Use empty-string for base if independent absolute path is required" + } + set targetpath $specifiedpath + set storedpath [punk::path::relative $base $specifiedpath] + + } + } else { + if {[file type $specifiedpath] eq "relative"} { + #if specifiedpath is relative - and we don't have a base, we now need to convert relative to cwd to an absolute path for storage + set targetpath [file normalize $specifiedpath] + set storedpath $targetpath + } else { + set targetpath $specifiedpath + set storedpath $targetpath + } + } + + # + #NOTE: specifiedpath can be a relative path (to cwd) when base is empty + #OR - a relative path when base itself is relative e.g base: somewhere targetpath somewhere/etc + #possibly also: base: somewhere targetpath: ../elsewhere/etc + # + #todo - write tests + + + if {([llength $args] % 2) != 0} { + error "get_relativecksum_from_base error. args supplied must be in the form of key-value pairs. received '$args' " + } + if {[dict exists $args cksum]} { + if {[dict get $args cksum] ne "" && ![cksum_is_tag [dict get $args cksum]]} { + error "get_relativecksum_from_base called with existing cksum value (and is not a tag or empty-value to be replaced) cksum: [dict get $args cksum] Set cksum to be empty, any tag such as or remove the key and try again." + } + } + + + set ckopts [cksum_filter_opts {*}$args] + set ckinfo [cksum_path $targetpath {*}$ckopts] + + set keyvals $args + dict set keyvals cksum [dict get $ckinfo cksum] + dict set keyvals cksum_all_opts [dict get $ckinfo opts] + if {[dict exists $ckinfo error]} { + dict set keyvals cksum_error [dict get $ckinfo error] + } + + #set relpath [punk::repo::path_strip_alreadynormalized_prefixdepth $fullpath $base] ;#empty base ok noop + #storedpath is relative if possible + return [dict create $storedpath $keyvals] + } + + #calculate the runtime checksum and vfs checksums + proc get_all_vfs_build_cksums {path} { + set buildfolder [get_build_workdir $path] + set cksum_base_folder [file dirname $buildfolder] ;#this is the /src folder - a reasonable base for our vfs cksums + set dict_cksums [dict create] + + set buildrelpath [punk::repo::path_strip_alreadynormalized_prefixdepth $buildfolder $cksum_base_folder] + set vfs_tail_list [glob -nocomplain -dir $cksum_base_folder -type d -tails *.vfs] + + foreach vfstail $vfs_tail_list { + set vname [file rootname $vfstail] + dict set dict_cksums $vfstail [list cksum ""] + dict set dict_cksums [file join $buildrelpath $vname.exe] [list cksum ""] + } + + set fullpath_buildruntime $buildfolder/buildruntime.exe + + set ckinfo_buildruntime [cksum_path $fullpath_buildruntime] + set ck [dict get $ckinfo_buildruntime cksum] + + + set relpath [file join $buildrelpath "buildruntime.exe"] + dict set dict_cksums $relpath [list cksum $ck] + + set dict_cksums [fill_relativecksums_from_base_and_relativepathdict $cksum_base_folder $dict_cksums] + + return $dict_cksums + } + + proc get_vfs_build_cksums_stored {vfsfolder} { + set vfscontainer [file dirname $vfsfolder] + set buildfolder $vfscontainer/_build + set vfs [file tail $vfsfolder] + set vname [file rootname $vfs] + set dict_vfs [list $vname.vfs "" $vname.exe "" buildruntime.exe ""] + set ckfile $buildfolder/$vname.cksums + if {[file exists $ckfile]} { + set data [punk::mix::util::fcat -translation binary $ckfile] + foreach ln [split $data \n] { + if {[string trim $ln] eq ""} {continue} + lassign $ln path cksum + dict set dict_vfs $path $cksum + } + } + return $dict_vfs + } + proc get_all_build_cksums_stored {path} { + set buildfolder [get_build_workdir $path] + + set vfscontainer [file dirname $buildfolder] + set vfslist [glob -nocomplain -dir $vfscontainer -type d -tail *.vfs] + set dict_cksums [dict create] + foreach vfs $vfslist { + set vname [file rootname $vfs] + set dict_vfs [get_vfs_build_cksums_stored $vfscontainer/$vfs] + + dict set dict_cksums $vname $dict_vfs + } + return $dict_cksums + } + + proc store_vfs_build_cksums {vfsfolder} { + if {![file isdirectory $vfsfolder]} { + error "Unable to find supplied vfsfolder: $vfsfolder" + } + set vfscontainer [file dirname $vfsfolder] + set buildfolder $vfscontainer/_build + set dict_vfs [get_vfs_build_cksums $vfsfolder] + set data "" + dict for {path cksum} $dict_vfs { + append data "$path $cksum" \n + } + set fd [open $buildfolder/$vname.cksums w] + chan configure $fd -translation binary + puts $fd $data + close $fd + return $dict_vfs + } + + + + } +} diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm new file mode 100644 index 00000000..5843789f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.1.tm @@ -0,0 +1,1128 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'deck make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::cli 0.3.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::repo +package require punk::ansi +package require punkcheck ;#checksum and/or timestamp records + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#review +#deck - rename to dev +namespace eval punk::mix::cli { + namespace eval temp_import { + } + namespace ensemble create + + package require punk::overlay + catch { + punk::overlay::import_commandset module . ::punk::mix::commandset::module + } + punk::overlay::import_commandset debug . ::punk::mix::commandset::debug + punk::overlay::import_commandset repo . ::punk::mix::commandset::repo + punk::overlay::import_commandset lib . ::punk::mix::commandset::loadedlib + + catch { + package require punk::mix::commandset::project + punk::overlay::import_commandset project . ::punk::mix::commandset::project + punk::overlay::import_commandset projects . ::punk::mix::commandset::project::collection + } + if {[catch { + package require punk::mix::commandset::layout + punk::overlay::import_commandset project.layout . ::punk::mix::commandset::layout + punk::overlay::import_commandset project.layouts . ::punk::mix::commandset::layout::collection + } errM]} { + puts stderr "error loading punk::mix::commandset::layout" + puts stderr $errM + } + if {[catch { + package require punk::mix::commandset::buildsuite + punk::overlay::import_commandset buildsuite . ::punk::mix::commandset::buildsuite + punk::overlay::import_commandset buildsuites . ::punk::mix::commandset::buildsuite::collection + } errM]} { + puts stderr "error loading punk::mix::commandset::buildsuite" + puts stderr $errM + } + punk::overlay::import_commandset scriptwrap . ::punk::mix::commandset::scriptwrap + if {[catch { + package require punk::mix::commandset::doc + punk::overlay::import_commandset doc . ::punk::mix::commandset::doc + punk::overlay::import_commandset "" "" ::punk::mix::commandset::doc::collection + } errM]} { + puts stderr "error loading punk::mix::commandset::doc" + puts stderr $errM + } + + + proc help {args} { + #set basehelp [punk::mix::base::help -extension [namespace current] {*}$args] + set basehelp [punk::mix::base help {*}$args] + #puts stdout "punk::mix help" + return $basehelp + } + + proc stat {{workingdir ""} args} { + dict set args -v 0 + punk::mix::cli::lib::get_status $workingdir {*}$args + } + proc status {{workingdir ""} args} { + dict set args -v 1 + punk::mix::cli::lib::get_status $workingdir {*}$args + } + + + + + + + +} + + +namespace eval punk::mix::cli { + + + #interp alias {} ::punk::mix::cli::project.new {} ::punk::mix::cli::new + + + proc make {args} { + set startdir [pwd] + set project_base "" ;#empty for unknown + if {[punk::repo::is_git $startdir]} { + set project_base [punk::repo::find_git] + set sourcefolder $project_base/src + } elseif {[punk::repo::is_fossil $startdir]} { + set project_base [punk::repo::find_fossil] + set sourcefolder $project_base/src + } else { + if {[punk::repo::is_candidate $startdir]} { + set project_base [punk::repo::find_candidate] + set sourcefolder $project_base/src + puts stderr "WARNING - project not under git or fossil control" + puts stderr "Using base folder $project_base" + } else { + set sourcefolder $startdir + } + } + + #review - why can't we be anywhere in the project? + #also - if no make.tcl - can we use the running shell's make.tcl ? (after prompting user?) + if {([file tail $sourcefolder] ne "src") || (![file exists $sourcefolder/make.tcl])} { + puts stderr "dev make must be run from src folder containing make.tcl - unable to proceed (cwd: [pwd])" + if {[string length $project_base]} { + if {[file exists $project_base/src] && [string tolower [pwd]] ne [string tolower $project_base/src]} { + puts stderr "Try cd to $project_base/src" + } + } else { + if {[file exists $startdir/Makefile]} { + puts stdout "A Makefile exists at $startdir/Makefile." + if {"windows" eq $::tcl_platform(platform)} { + puts stdout "Try running: msys2 -ucrt64 -here -c \"make build\" or bash -c \"make build\"" + } else { + puts stdout "Try runing: make build" + } + } + } + return false + } + + if {![string length $project_base]} { + puts stderr "WARNING no git or fossil repository detected." + puts stderr "Using base folder $startdir" + set project_base $startdir + } + + set lc_this_exe [string tolower [info nameofexecutable]] + set lc_proj_bin [string tolower $project_base/bin] + set lc_build_bin [string tolower $project_base/src/_build] + + if {"project" in $args} { + set is_own_exe 0 + if {[string match "${lc_proj_bin}*" $lc_this_exe] || [string match "${lc_build_bin}" $lc_this_exe]} { + set is_own_exe 1 + puts stderr "WARNING - running make using executable that may be created by the project being built" + set answer [util::askuser "Do you want to proceed using this executable? (build will probably stop when it is unable to update the executable) Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -confirm 0 to avoid prompts." + return + } + } + } + cd $sourcefolder + #use run so that stdout visible as it goes + if {![catch {run --timeout=55000 -debug [info nameofexecutable] $sourcefolder/make.tcl {*}$args} exitinfo]} { + #todo - notify if exit because of timeout! + puts stderr "exitinfo: $exitinfo" + set exitcode [dict get $exitinfo exitcode] + } else { + puts stderr "Error unable to determine exitcode. err: $exitinfo" + cd $startdir + return false + } + + cd $startdir + if {$exitcode != 0} { + puts stderr "FAILED with exitcode $exitcode" + return false + } else { + puts stdout "OK make finished " + return true + } + } + + proc Kettle {args} { + tailcall lib::kettle_call lib {*}$args + } + proc KettleShell {args} { + tailcall lib::kettle_call shell {*}$args + } + + + + namespace eval lib { + namespace path ::punk::mix::util + + + proc module_types {} { + #first in list is default for unspecified -type when creating new module + #return [list plain tarjar zipkit] + return [list plain tarjar zip] + } + + proc validate_modulename {modulename args} { + set opts [list\ + -errorprefix validate_modulename\ + ] + if {[llength $args] %2 != 0} {error "validate_modulename args must be name-value pairs: received '$args'"} + foreach {k v} $args { + switch -- $k { + -errorprefix { + dict set opts $k $v + } + default { + error "validate_modulename error: unknown option '$k'. known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_errorprefix [dict get $opts -errorprefix] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + + validate_name_not_empty_or_spaced $modulename -errorprefix $opt_errorprefix + set testname [string map {:: {}} $modulename] + if {[string first : $testname] >=0} { + error "$opt_errorprefix '$modulename' can only contain paired colons" + } + set badchars [list - "$" "?" "*"] + foreach bc $badchars { + if {[string first $bc $modulename] >= 0} { + error "$opt_errorprefix '$modulename' can not contain character '$bc'" + } + } + return $modulename + } + + proc validate_projectname {projectname args} { + set defaults [list\ + -errorprefix projectname\ + ] + if {[llength $args] %2 != 0} {error "validate_modulename args must be name-value pairs: received '$args'"} + set known_opts [dict keys $defaults] + foreach k [dict keys $args] { + if {$k ni $known_opts} { + error "validate_modulename error: unknown option $k. known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_errorprefix [dict get $opts -errorprefix] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + validate_name_not_empty_or_spaced $projectname -errorprefix $opt_errorprefix + set reserved_words [list etc lib bin modules src doc vendorlib vendormodules embedded runtime _aside _build] + if {$projectname in $reserved_words } { + error "$opt_errorprefix '$projectname' cannot be one of reserved_words: $reserved_words" + } + if {[string first "::" $projectname] >= 0} { + error "$opt_errorprefix '$projectname' cannot contain namespace separator '::'" + } + return $projectname + } + proc validate_name_not_empty_or_spaced {name args} { + set opts [list\ + -errorprefix projectname\ + ] + if {[llength $args] %2 != 0} {error "validate_name_not_empty_or_spaced args must be name-value pairs: received '$args'"} + foreach {k v} $args { + switch -- $k { + -errorprefix { + dict set opts $k $v + } + default { + error "validate_name_not_empty_or_spaced error: unknown option $k. known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_errorprefix [dict get $opts -errorprefix] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + if {![string length $name]} { + error "$opt_errorprefix cannot be empty" + } + if {[string length [string map [list " " "" \n "" \r "" \t ""] $name]] != [string length $name]} { + error "$opt_errorprefix cannot contain whitespace" + } + return $name + } + + #split modulename (as present in a filename or namespaced name) into name/version ignoring leading namespace path + #ignore trailing .tm .TM if present + #if version doesn't pass validation - treat it as part of the modulename and return empty version string without error + #Up to caller to validate. + proc split_modulename_version {modulename} { + set lastpart [namespace tail $modulename] + set lastpart [file tail $lastpart] ;# should be ok to use file tail now that we've ensured no namespace components + if {[string equal -nocase [file extension $modulename] ".tm"]} { + set fileparts [split [file rootname $lastpart] -] + } else { + set fileparts [split $lastpart -] + } + if {[punk::mix::util::is_valid_tm_version [lindex $fileparts end]]} { + set versionsegment [lindex $fileparts end] + set namesegment [join [lrange $fileparts 0 end-1] -];#re-stitch + } else { + # + set namesegment [join $fileparts -] + set versionsegment "" + } + return [list $namesegment $versionsegment] + } + + proc get_status {{workingdir ""} args} { + set result "" + if {$workingdir ne ""} { + if {[file pathtype $workingdir] ne "absolute"} { + set workingdir [file normalize $workingdir] + } + set active_dir $workingdir + } else { + set active_dir [pwd] + } + set defaults [dict create\ + -v 1\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- + set opt_v [dict get $opts -v] + # -- --- --- --- --- --- --- --- --- + + + set repopaths [punk::repo::find_repos [pwd]] + set repos [dict get $repopaths repos] + if {![llength $repos]} { + append result [dict get $repopaths warnings] + } else { + append result [dict get $repopaths warnings] + lassign [lindex $repos 0] repopath repotypes + if {"fossil" in $repotypes} { + #review - multiple process launches to fossil a bit slow on windows.. + #could we query global db in one go instead? + # + set fossil_prog [auto_execok fossil] + append result "FOSSIL project based at $repopath with revision: [punk::repo::fossil_revision $repopath]" \n + set fosinfo [exec {*}$fossil_prog info] + append result [join [punk::repo::grep {repository:*} $fosinfo] \n] \n + + set fosrem [exec {*}$fossil_prog remote ls] + if {[string length $fosrem]} { + append result "Remotes:\n" + append result " " $fosrem \n + } + + + append result [join [punk::repo::grep {tags:*} $fosinfo] \n] \n + + set dbinfo [exec {*}$fossil_prog dbstat] + append result [join [punk::repo::grep {project-name:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {tickets:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {project-age:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {latest-change:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {files:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {check-ins:*} $dbinfo] \n] \n + if {"project" in $repotypes} { + #punk project + if {![catch {package require textblock; package require patternpunk}]} { + set result [textblock::join -- [>punk . logo] " " $result] + append result \n + } + } + + set timeline [exec fossil timeline -n 5 -t ci] + set timeline [string map {\r\n \n} $timeline] + append result $timeline + if {$opt_v} { + set repostate [punk::repo::workingdir_state $repopath -repopaths $repopaths -repotypes fossil] + append result \n [punk::repo::workingdir_state_summary $repostate] + } + + } + #repotypes *could* be both git and fossil - so report both if so + if {"git" in $repotypes} { + append result "GIT project based at $repopath with revision: [punk::repo::git_revision $repopath]" \n + if {[string length [set git_prog [auto_execok git]]]} { + set git_remotes [exec {*}$git_prog remote -v] + append result $git_remotes + if {$opt_v} { + set repostate [punk::repo::workingdir_state $repopath -repopaths $repopaths -repotypes git] + append result \n [punk::repo::workingdir_state_summary $repostate] + } + } + } + + } + + return $result + } + + + proc build_modules_from_source_to_base {srcdir basedir args} { + set antidir [list "#*" "_build" "_aside" ".git" ".fossil*"] ;#exact or glob patterns for folders (at any level) we don't want to search in or copy. + set defaults [list\ + -installer punk::mix::cli::build_modules_from_source_to_base\ + -call-depth-internal 0\ + -max_depth 1000\ + -subdirlist {}\ + -punkcheck_eventobj "\uFFFF"\ + -glob *.tm\ + -podglob #modpod-*\ + ] + set opts [dict merge $defaults $args] + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set installername [dict get $opts -installer] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set CALLDEPTH [dict get $opts -call-depth-internal] + set max_depth [dict get $opts -max_depth] + set subdirlist [dict get $opts -subdirlist] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set fileglob [dict get $opts -glob] + set podglob [dict get $opts -podglob] + if {![string match "*.tm" $fileglob]} { + error "build_modules_from_source_to_base -glob '$fileglob' doesn't seem to target tcl modules." + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_punkcheck_eventobj [dict get $opts -punkcheck_eventobj] + + set magicversion [punk::mix::util::magic_tm_version] ;#deliberately large so given load-preference when testing + set module_list [list] + + if {[file tail [file dirname $srcdir]] ne "src"} { + puts stderr "ERROR build_modules_from_source_to_base can only be called with a srcdir that is a subfolder of your 'src' directory" + puts stderr "The .tm modules are namespaced based on their directory depth - so we need to start at the root" + puts stderr "To build a subtree of your modules - use an appropriate src/modules folder and pass in the -subdirlist." + puts stderr "e.g if your modules are based at /x/src/modules2 and you wish to build only the .tm files at /x/src/modules2/skunkworks/lib" + puts stderr "Use: >build_modules_from_source_to_base /x/src/modules2 /x/modules2 -subdirlist {skunkworks lib}" + exit 2 + } + set srcdirname [file tail $srcdir] + + set build [file dirname $srcdir]/_build/$srcdirname ;#relative to *original* srcdir - not current_source_dir + if {[llength $subdirlist] == 0} { + set target_module_dir $basedir + set current_source_dir $srcdir + } else { + set target_module_dir $basedir/[file join {*}$subdirlist] + set current_source_dir $srcdir/[file join {*}$subdirlist] + } + if {![file exists $target_module_dir]} { + error "build_modules_from_source_to_base from current source dir: '$current_source_dir'. Basedir:'$current_module_dir' doesn't exist or is empty" + } + if {![file exists $current_source_dir]} { + error "build_modules_from_source_to_base from current source dir:'$current_source_dir' doesn't exist or is empty" + } + + #---------------------------------------- + set punkcheck_file [file join $basedir/.punkcheck] + if {$CALLDEPTH == 0} { + + set config [dict create\ + -glob $fileglob\ + -max_depth 0\ + ] + #lassign [punkcheck::start_installer_event $punkcheck_file $installername $srcdir $basedir $config] _eventid punkcheck_eventid _recordset record_list + # -- --- + set installer [punkcheck::installtrack new $installername $punkcheck_file] + $installer set_source_target $srcdir $basedir + set event [$installer start_event $config] + # -- --- + + } else { + set event $opt_punkcheck_eventobj + } + #---------------------------------------- + + + set process_modules [dict create] + #put pods first in processing order + set src_pods [glob -nocomplain -dir $current_source_dir -type d -tail $podglob] + foreach podpath $src_pods { + dict set process_modules $podpath [dict create -type pod] + } + set src_modules [glob -nocomplain -dir $current_source_dir -type f -tail $fileglob] + foreach modulepath $src_modules { + dict set process_modules $modulepath [dict create -type file] + } + + set did_skip 0 ;#flag for stdout/stderr formatting only + dict for {modpath modinfo} $process_modules { + set modtype [dict get $modinfo -type] + + set is_interesting 0 + if {[string match "foobar" $current_source_dir]} { + set is_interesting 1 + } + if {$is_interesting} { + puts "build_modules_from_source_to_base >>> module $current_source_dir/$modpath" + } + set fileparts [split [file rootname $modpath] -] + #set tmfile_versionsegment [lindex $fileparts end] + lassign [split_modulename_version $modpath] basename tmfile_versionsegment + if {$tmfile_versionsegment eq ""} { + #split_modulename_version version part will be empty if not valid tcl version + #last segment doesn't look even slightly versiony - fail. + puts stderr "ERROR: Unable to confirm file $current_source_dir/$modpath is a reasonably versioned .tm module - ABORTING." + exit 1 + } + switch -- $modtype { + pod { + #basename still contains leading #modpod- + if {[string match #modpod-* $basename]} { + set basename [string range $basename 8 end] + } else { + error "build_modules_from_source_to_base, pod, unexpected basename $basename" ;#shouldn't be possible with default podglob - review - why is podglob configurable? + } + set versionfile $current_source_dir/$basename-buildversion.txt ;#needs to be added in targetset_addsource to trigger rebuild if changed (only when magicversion in use) + if {$tmfile_versionsegment eq $magicversion} { + set versionfiledata "" + if {![file exists $versionfile]} { + puts stderr "\nWARNING: Missing buildversion text file: $versionfile" + puts stderr "Using version 0.1 - create $versionfile containing the desired version number as the top line to avoid this warning\n" + set module_build_version "0.1" + } else { + set fd [open $versionfile r] + set versionfiledata [read $fd]; close $fd + set ln0 [lindex [split $versionfiledata \n] 0] + set ln0 [string trim $ln0]; set ln0 [string trim $ln0 \r] + if {![util::is_valid_tm_version $ln0]} { + puts stderr "ERROR: build version '$ln0' specified in $versionfile is not suitable. Please ensure a proper version number is at first line of file" + exit 3 + } + set module_build_version $ln0 + } + } else { + set module_build_version $tmfile_versionsegment + } + + set buildfolder $current_source_dir/_build + file mkdir $buildfolder + # -- --- + set config [dict create\ + -glob *\ + -max_depth 100\ + ] + # -max_depth -1 for no limit + set build_installername pods_in_$current_source_dir + set build_installer [punkcheck::installtrack new $build_installername $buildfolder/.punkcheck] + $build_installer set_source_target $current_source_dir/$modpath $buildfolder + set build_event [$build_installer start_event $config] + # -- --- + set podtree_copy $buildfolder/#modpod-$basename-$module_build_version + set modulefile $buildfolder/$basename-$module_build_version.tm + + + $build_event targetset_init INSTALL $podtree_copy + $build_event targetset_addsource $current_source_dir/$modpath + if {$tmfile_versionsegment eq $magicversion} { + $build_event targetset_addsource $versionfile + } + if {\ + [llength [dict get [$build_event targetset_source_changes] changed]]\ + || [llength [$build_event get_targets_exist]] < [llength [$build_event get_targets]]\ + } { + $build_event targetset_started + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + + set delete_failed 0 + if {[file exists $buildfolder/]} { + puts stderr "deleting existing _build copy at $podtree_copy" + if {[catch { + file delete -force $podtree_copy + } errMsg]} { + puts stderr "[punk::ansi::a+ red]deletion of _build copy at $podtree_copy failed: $errMsg[punk::ansi::a]" + set delete_failed 1 + } + } + if {!$delete_failed} { + puts stdout "copying.." + puts stdout "$current_source_dir/$modpath" + puts stdout "to:" + puts stdout "$podtree_copy" + file copy $current_source_dir/$modpath $podtree_copy + if {$tmfile_versionsegment eq $magicversion} { + set tmfile $buildfolder/#modpod-$basename-$module_build_version/$basename-$magicversion.tm + if {[file exists $tmfile]} { + set newname $buildfolder/#modpod-$basename-$module_build_version/$basename-$module_build_version.tm + file rename $tmfile $newname + set tmfile $newname + } + set fd [open $tmfile r]; fconfigure $fd -translation binary; set data [read $fd]; close $fd + set data [string map [list $magicversion $module_build_version] $data] + set fdout [open $tmfile w] + fconfigure $fdout -translation binary + puts -nonewline $fdout $data + close $fdout + } + #delete and regenerate zip and modpod stubbed zip + set had_error 0 + set notes [list] + if {[catch { + file delete $buildfolder/$basename-$module_build_version.zip + } err] } { + set had_error 1 + lappend notes "zip_delete_failed" + } + if {[catch { + file delete $buildfolder/$basename-$module_build_version.tm + } err]} { + set had_error 1 + lappend notes "tm_delete_failed" + } + #create ordinary zip file without using external executable + package require punk::zip + set zipfile $buildfolder/$basename-$module_build_version.zip ;#ordinary zip file (deflate) + + if 0 { + #use -base $buildfolder so that -directory is included in the archive - the modpod stub relies on this - and extraction would be potentially messy otherwise + punk::zip::mkzip -base $buildfolder -directory $buildfolder/#modpod-$basename-$module_build_version -- $zipfile * + #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files + } + #zipfs mkzip does exactly what we need anyway in this case + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + + + if {$had_error} { + $build_event targetset_end FAILED -note [join $notes ,] + } else { + # -- ---------- + $build_event targetset_end OK + # -- ---------- + } + } else { + $build_event targetset_end FAILED -note "could not delete $podtree_copy" + } + + } else { + puts -nonewline stderr "." + set did_skip 1 + #set file_record [punkcheck::installfile_skipped_install $basedir $file_record] + $build_event targetset_end SKIPPED + } + $build_event destroy + $build_installer destroy + + $event targetset_init INSTALL $target_module_dir/$basename-$module_build_version.tm + $event targetset_addsource $modulefile + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + + $event targetset_started + # -- --- --- --- --- --- + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + lappend module_list $modulefile + file copy -force $modulefile $target_module_dir + puts stderr "Copied zip modpod module $modulefile to $target_module_dir" + # -- --- --- --- --- --- + $event targetset_end OK -note "zip modpod" + } else { + puts -nonewline stderr "." + set did_skip 1 + if {$is_interesting} { + puts stderr "$modulefile [$event targetset_source_changes]" + } + $event targetset_end SKIPPED + } + } + tarjar { + #basename may still contain #tarjar- + #to be obsoleted - update modpod to (optionally) use vfs::tar + } + file { + set m $modpath + if {$tmfile_versionsegment eq $magicversion} { + #set basename [join [lrange $fileparts 0 end-1] -] + set versionfile $current_source_dir/$basename-buildversion.txt + set versionfiledata "" + if {![file exists $versionfile]} { + puts stderr "\nWARNING: Missing buildversion text file: $versionfile" + puts stderr "Using version 0.1 - create $versionfile containing the desired version number as the top line to avoid this warning\n" + set module_build_version "0.1" + } else { + set fd [open $versionfile r] + set versionfiledata [read $fd]; close $fd + set ln0 [lindex [split $versionfiledata \n] 0] + set ln0 [string trim $ln0]; set ln0 [string trim $ln0 \r] + if {![util::is_valid_tm_version $ln0]} { + puts stderr "ERROR: build version '$ln0' specified in $versionfile is not suitable. Please ensure a proper version number is at first line of file" + exit 3 + } + set module_build_version $ln0 + } + + + if {[file exists $current_source_dir/#tarjar-$basename-$magicversion]} { + #rebuild the .tm from the #tarjar + + if {[file exists $current_source_dir/#tarjar-$basename-$magicversion/DESCRIPTION.txt]} { + + } else { + + } + #REVIEW - should be in same structure/depth as $target_module_dir in _build? + + #TODO + set buildfolder $current_sourcedir/_build + file mkdir $buildfolder + + set tmfile $buildfolder/$basename-$module_build_version.tm + file delete -force $buildfolder/#tarjar-$basename-$module_build_version + file delete -force $tmfile + + + file copy -force $current_source_dir/#tarjar-$basename-$magicversion $buildfolder/#tarjar-$basename-$module_build_version + # + #bsdtar doesn't seem to work.. or I haven't worked out the right options? + #exec tar -cvf $buildfolder/$basename-$module_build_version.tm $buildfolder/#tarjar-$basename-$module_build_version + package require tar + tar::create $tmfile $buildfolder/#tarjar-$basename-$module_build_version + if {![file exists $tmfile]} { + puts stdout "ERROR: failed to build tarjar file $tmfile" + exit 4 + } + #copy the file? + #set target $target_module_dir/$basename-$module_build_version.tm + #file copy -force $tmfile $target + + lappend module_list $tmfile + } else { + #assume that either the .tm is not a tarjar - or the tarjar dir is capped (trailing #) and the .tm has been manually tarred. + if {[file exists $current_source_dir/#tarjar-$basename-${magicversion}#]} { + puts stderr "\nWarning: found 'capped' folder #tarjar-$basename-${magicversion}# - No attempt being made to update version in description.txt" + } + + #------------------------------ + # + #set target_relpath [punkcheck::lib::path_relative $basedir $target_module_dir/$basename-$module_build_version.tm] + #set file_record [punkcheck::installfile_begin $basedir $target_relpath $installername -eventid $punkcheck_eventid] + $event targetset_init INSTALL $target_module_dir/$basename-$module_build_version.tm + $event targetset_addsource $versionfile + $event targetset_addsource $current_source_dir/$m + + #set changed_list [list] + ## -- --- --- --- --- --- + #set source_relpath [punkcheck::lib::path_relative $basedir $versionfile] + #set file_record [punkcheck::installfile_add_source_and_fetch_metadata $basedir $source_relpath $file_record] + ## -- --- --- --- --- --- + #set source_relpath [punkcheck::lib::path_relative $basedir $current_source_dir/$m] + #set file_record [punkcheck::installfile_add_source_and_fetch_metadata $basedir $source_relpath $file_record] + ## -- --- --- --- --- --- + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + #set changed_list [dict get $changed_unchanged changed] + + + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + + #set file_record [punkcheck::installfile_started_install $basedir $file_record] + $event targetset_started + # -- --- --- --- --- --- + set target $target_module_dir/$basename-$module_build_version.tm + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + puts stdout "copying module $current_source_dir/$m to $target as version: $module_build_version ([file tail $target])" + set fd [open $current_source_dir/$m r]; fconfigure $fd -translation binary; set data [read $fd]; close $fd + set data [string map [list $magicversion $module_build_version] $data] + set fdout [open $target w] + fconfigure $fdout -translation binary + puts -nonewline $fdout $data + close $fdout + #file copy -force $srcdir/$m $target + lappend module_list $target + # -- --- --- --- --- --- + #set file_record [punkcheck::installfile_finished_install $basedir $file_record] + $event targetset_end OK + } else { + if {$is_interesting} { + puts stdout "skipping module $current_source_dir/$m - no change in sources detected" + } + puts -nonewline stderr "." + set did_skip 1 + #set file_record [punkcheck::installfile_skipped_install $basedir $file_record] + $event targetset_end SKIPPED + } + + #------------------------------ + + } + + continue + } + ##------------------------------ + ## + #set target_relpath [punkcheck::lib::path_relative $basedir $target_module_dir/$m] + #set file_record [punkcheck::installfile_begin $basedir $target_relpath $installername -eventid $punkcheck_eventid] + #set changed_list [list] + ## -- --- --- --- --- --- + #set source_relpath [punkcheck::lib::path_relative $basedir $current_source_dir/$m] + #set file_record [punkcheck::installfile_add_source_and_fetch_metadata $basedir $source_relpath $file_record] + ## -- --- --- --- --- --- + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + #set changed_list [dict get $changed_unchanged changed] + #---------- + $event targetset_init INSTALL $target_module_dir/$m + $event targetset_addsource $current_source_dir/$m + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + + #set file_record [punkcheck::installfile_started_install $basedir $file_record] + $event targetset_started + # -- --- --- --- --- --- + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + lappend module_list $current_source_dir/$m + file copy -force $current_source_dir/$m $target_module_dir + puts stderr "Copied already versioned module $current_source_dir/$m to $target_module_dir" + # -- --- --- --- --- --- + #set file_record [punkcheck::installfile_finished_install $basedir $file_record] + $event targetset_end OK -note "already versioned module" + } else { + puts -nonewline stderr "." + set did_skip 1 + if {$is_interesting} { + puts stderr "$current_source_dir/$m [$event targetset_source_changes]" + } + #set file_record [punkcheck::installfile_skipped_install $basedir $file_record] + $event targetset_end SKIPPED + } + } + } + } ;#end dict for {modpath modinfo} $process_modules + + + if {$CALLDEPTH >= $max_depth} { + set subdirs [list] + } else { + set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *] + } + #puts stderr "subdirs: $subdirs" + foreach d $subdirs { + set skipdir 0 + foreach dg $antidir { + if {[string match $dg $d]} { + set skipdir 1 + continue + } + } + if {$skipdir} { + continue + } + if {![file exists $target_module_dir/$d]} { + file mkdir $target_module_dir/$d + } + lappend module_list {*}[build_modules_from_source_to_base $srcdir $basedir\ + -call-depth-internal [expr {$CALLDEPTH +1}]\ + -subdirlist [list {*}$subdirlist $d]\ + -punkcheck_eventobj $event\ + -glob $fileglob\ + -podglob $podglob\ + ] + } + if {$did_skip} { + puts -nonewline stdout \n + } + if {$CALLDEPTH == 0} { + $event destroy + $installer destroy + } + return $module_list + } + + variable kettle_reset_bodies [dict create] + variable kettle_reset_args [dict create] + #We are abusing kettle to run in-process. + # when we change to another project we need recipes to be reloaded. + # Kettle rewrites some of it's own procs - stopping reloading of recipes when we change folders + #kettle_init stores the original proc bodies & args + proc kettle_init {} { + variable kettle_reset_bodies ;#dict + variable kettle_reset_args + set reset_procs [list\ + ::kettle::benchmarks\ + ::kettle::doc\ + ::kettle::figures\ + ::kettle::meta::scan\ + ::kettle::testsuite\ + ] + foreach p $reset_procs { + set b [info body $p] + if {[string match "*Overwrite self*" $b]} { + dict set kettle_reset_bodies $p $b + set argnames [info args $p] + set arglist [list] + foreach a $argnames { + if {[info default $p $a dval]} { + lappend arglist [list $a $dval] + } else { + lappend arglist $a + } + } + dict set kettle_reset_args $p $arglist + } + } + + } + #call kettle_reinit to ensure recipes point to current project + proc kettle_reinit {} { + variable kettle_reset_bodies + variable kettle_reset_args + dict for {p b} $kettle_reset_bodies { + #set b [dict get $kettle_reset_bodies $p] + set argl [dict get $kettle_reset_args $p] + uplevel 1 [list ::proc $p $argl $b] + } + #todo - determine standard recipes by examining standard.tcl instead of hard coding? + set standard_recipes [list\ + null\ + forever\ + list-recipes\ + help-recipes\ + help-dump\ + help-recipes\ + help\ + list\ + list-options\ + help-options\ + show-configuration\ + show-state\ + show\ + meta-status\ + gui\ + ] + #set ::kettle::recipe::recipe [dict create] + dict for {r -} $::kettle::recipe::recipe { + if {$r ni $standard_recipes} { + dict unset ::kettle::recipe::recipe $r + } + } + } + proc kettle_call {calltype args} { + variable kettle_reset_bodies + switch -- $calltype { + lib {} + shell { + set kettleappfile [file dirname [info nameofexecutable]]/kettle + set kettlebatfile [file dirname [info nameofexecutable]]/kettle.bat + + if {(![file exists $kettleappfile]) && (![file exists $kettlebatfile])} { + error "deck kettle_call unable to find installed kettle application file '$kettleappfile' (or '$kettlebatfile' if on windows)" + } + if {[file exists $kettleappfile]} { + set kettlescript $kettleappfile + } + if {$::tcl_platform(platform) eq "windows"} { + if {[file exists $kettlebatfile]} { + set kettlescript $kettlebatfile + } + } + } + default { + error "deck kettle_call 1st argument must be one of: 'lib' for direct use of kettle module or 'shell' to call as separate process" + } + } + set startdir [pwd] + if {![file exists $startdir/build.tcl]} { + error "deck kettle must be run from a folder containing build.tcl (cwd: [pwd])" + } + if {[package provide kettle] eq ""} { + puts stdout "Loading kettle package - may be delay on first load ..." + package require kettle + kettle_init ;#store original procs for those kettle procs that rewrite themselves + } else { + if {[dict size $kettle_reset_bodies] == 0} { + #presumably package require kettle was called without calling our kettle_init hack. + kettle_init + } else { + #undo proc rewrites + kettle_reinit + } + } + set first [lindex $args 0] + if {[string match @* $first]} { + error "deck kettle doesn't support special operations - try calling tclsh kettle directly" + } + if {$first eq "-f"} { + set args [lassign $args __ path] + } else { + set path $startdir/build.tcl + } + set opts [list] + + if {[lindex $args 0] eq "-trace"} { + set args [lrange $args 1 end] + lappend opts --verbose on + } + set goals [list] + + if {$calltype eq "lib"} { + file mkdir ~/.kettle + set dotfile ~/.kettle/config + if {[file exists $dotfile] && + [file isfile $dotfile] && + [file readable $dotfile]} { + ::kettle io trace {Loading dotfile $dotfile ...} + set args [list {*}[::kettle path cat $dotfile] {*}$args] + } + } + + #hardcoded kettle option names (::kettle option names) - retrieved using kettle::option names + #This is done so we don't have to load kettle lib for shell call (both loading as module and running shell are annoyingly SLOW) + #REVIEW - needs to be updated to keep in sync with kettle. + set knownopts [list\ + --exec-prefix --bin-dir --lib-dir --prefix --man-dir --html-dir --markdown-dir --include-dir \ + --ignore-glob --dry --verbose --machine --color --state --config --with-shell --log \ + --log-append --log-mode --with-dia --constraints --file --limitconstraints --tmatch --notfile --single --valgrind --tskip --repeats \ + --iters --collate --match --rmatch --with-doc-destination --with-git --target --test-include \ + ] + + while {[llength $args]} { + set o [lindex $args 0] + switch -glob -- $o { + --* { + #instead of using: kettle option known + if {$o ni $knownopts} { + error "Unable to process unknown option $o." {} [list KETTLE (deck)] + } + lappend opts $o [lindex $args 1] + #::kettle::option set $o [lindex $args 1] + set args [lrange $args 2 end] + } + default { + lappend goals $o + set args [lrange $args 1 end] + } + } + } + + if {![llength $goals]} { + lappend goals help + } + if {"--prefix" ni [dict keys $opts]} { + dict set opts --prefix [file dirname $startdir] + } + if {$calltype eq "lib"} { + ::kettle status clear + ::kettle::option::set @kettle $startdir + foreach {o v} $opts { + ::kettle option set $o $v + } + ::kettle option set @srcscript $path + ::kettle option set @srcdir [file dirname $path] + ::kettle option set @goals $goals + #load standard recipes as listed in build.tcl + ::source $path + puts stderr "recipes: [::kettle recipe names]" + ::kettle recipe run {*}[::kettle option get @goals] + + set state [::kettle option get --state] + if {$state ne {}} { + puts stderr "saving kettle state: $state" + ::kettle status save $state + } + + } else { + #shell + puts stdout "Running external kettle process with args: $opts $goals" + run -n tclsh $kettlescript -f $path {*}$opts {*}$goals + } + + } + proc kettle_punk_recipes {} { + set txtdst ... + } + + } +} + + +namespace eval punk::mix::cli { + proc _cli {args} { + #don't use tailcall - base uses info level to determine caller + ::punk::mix::base::_cli {*}$args + } + variable default_command help + package require punk::mix::base + package require punk::overlay + if {[catch { + punk::overlay::custom_from_base [namespace current] ::punk::mix::base + } errM]} { + puts stderr "punk::mix::cli load error: Failed to overlay punk::mix::base $errM" + error "punk::mix::cli error: $errM" + } +} + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::cli [namespace eval punk::mix::cli { + variable version + set version 0.3.1 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.tm new file mode 100644 index 00000000..263ccc96 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/cli-0.3.tm @@ -0,0 +1,1128 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'deck make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::cli 0.3 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::repo +package require punk::ansi +package require punkcheck ;#checksum and/or timestamp records + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#review +#deck - rename to dev +namespace eval punk::mix::cli { + namespace eval temp_import { + } + namespace ensemble create + + package require punk::overlay + catch { + punk::overlay::import_commandset module . ::punk::mix::commandset::module + } + punk::overlay::import_commandset debug . ::punk::mix::commandset::debug + punk::overlay::import_commandset repo . ::punk::mix::commandset::repo + punk::overlay::import_commandset lib . ::punk::mix::commandset::loadedlib + + catch { + package require punk::mix::commandset::project + punk::overlay::import_commandset project . ::punk::mix::commandset::project + punk::overlay::import_commandset projects . ::punk::mix::commandset::project::collection + } + if {[catch { + package require punk::mix::commandset::layout + punk::overlay::import_commandset project.layout . ::punk::mix::commandset::layout + punk::overlay::import_commandset project.layouts . ::punk::mix::commandset::layout::collection + } errM]} { + puts stderr "error loading punk::mix::commandset::layout" + puts stderr $errM + } + if {[catch { + package require punk::mix::commandset::buildsuite + punk::overlay::import_commandset buildsuite . ::punk::mix::commandset::buildsuite + punk::overlay::import_commandset buildsuites . ::punk::mix::commandset::buildsuite::collection + } errM]} { + puts stderr "error loading punk::mix::commandset::buildsuite" + puts stderr $errM + } + punk::overlay::import_commandset scriptwrap . ::punk::mix::commandset::scriptwrap + if {[catch { + package require punk::mix::commandset::doc + punk::overlay::import_commandset doc . ::punk::mix::commandset::doc + punk::overlay::import_commandset "" "" ::punk::mix::commandset::doc::collection + } errM]} { + puts stderr "error loading punk::mix::commandset::doc" + puts stderr $errM + } + + + proc help {args} { + #set basehelp [punk::mix::base::help -extension [namespace current] {*}$args] + set basehelp [punk::mix::base help {*}$args] + #puts stdout "punk::mix help" + return $basehelp + } + + proc stat {{workingdir ""} args} { + dict set args -v 0 + punk::mix::cli::lib::get_status $workingdir {*}$args + } + proc status {{workingdir ""} args} { + dict set args -v 1 + punk::mix::cli::lib::get_status $workingdir {*}$args + } + + + + + + + +} + + +namespace eval punk::mix::cli { + + + #interp alias {} ::punk::mix::cli::project.new {} ::punk::mix::cli::new + + + proc make {args} { + set startdir [pwd] + set project_base "" ;#empty for unknown + if {[punk::repo::is_git $startdir]} { + set project_base [punk::repo::find_git] + set sourcefolder $project_base/src + } elseif {[punk::repo::is_fossil $startdir]} { + set project_base [punk::repo::find_fossil] + set sourcefolder $project_base/src + } else { + if {[punk::repo::is_candidate $startdir]} { + set project_base [punk::repo::find_candidate] + set sourcefolder $project_base/src + puts stderr "WARNING - project not under git or fossil control" + puts stderr "Using base folder $project_base" + } else { + set sourcefolder $startdir + } + } + + #review - why can't we be anywhere in the project? + #also - if no make.tcl - can we use the running shell's make.tcl ? (after prompting user?) + if {([file tail $sourcefolder] ne "src") || (![file exists $sourcefolder/make.tcl])} { + puts stderr "dev make must be run from src folder containing make.tcl - unable to proceed (cwd: [pwd])" + if {[string length $project_base]} { + if {[file exists $project_base/src] && [string tolower [pwd]] ne [string tolower $project_base/src]} { + puts stderr "Try cd to $project_base/src" + } + } else { + if {[file exists $startdir/Makefile]} { + puts stdout "A Makefile exists at $startdir/Makefile." + if {"windows" eq $::tcl_platform(platform)} { + puts stdout "Try running: msys2 -ucrt64 -here -c \"make build\" or bash -c \"make build\"" + } else { + puts stdout "Try runing: make build" + } + } + } + return false + } + + if {![string length $project_base]} { + puts stderr "WARNING no git or fossil repository detected." + puts stderr "Using base folder $startdir" + set project_base $startdir + } + + set lc_this_exe [string tolower [info nameofexecutable]] + set lc_proj_bin [string tolower $project_base/bin] + set lc_build_bin [string tolower $project_base/src/_build] + + if {"project" in $args} { + set is_own_exe 0 + if {[string match "${lc_proj_bin}*" $lc_this_exe] || [string match "${lc_build_bin}" $lc_this_exe]} { + set is_own_exe 1 + puts stderr "WARNING - running make using executable that may be created by the project being built" + set answer [util::askuser "Do you want to proceed using this executable? (build will probably stop when it is unable to update the executable) Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -confirm 0 to avoid prompts." + return + } + } + } + cd $sourcefolder + #use run so that stdout visible as it goes + if {![catch {run --timeout=55000 -debug [info nameofexecutable] $sourcefolder/make.tcl {*}$args} exitinfo]} { + #todo - notify if exit because of timeout! + puts stderr "exitinfo: $exitinfo" + set exitcode [dict get $exitinfo exitcode] + } else { + puts stderr "Error unable to determine exitcode. err: $exitinfo" + cd $startdir + return false + } + + cd $startdir + if {$exitcode != 0} { + puts stderr "FAILED with exitcode $exitcode" + return false + } else { + puts stdout "OK make finished " + return true + } + } + + proc Kettle {args} { + tailcall lib::kettle_call lib {*}$args + } + proc KettleShell {args} { + tailcall lib::kettle_call shell {*}$args + } + + + + namespace eval lib { + namespace path ::punk::mix::util + + + proc module_types {} { + #first in list is default for unspecified -type when creating new module + #return [list plain tarjar zipkit] + return [list plain tarjar zip] + } + + proc validate_modulename {modulename args} { + set opts [list\ + -errorprefix validate_modulename\ + ] + if {[llength $args] %2 != 0} {error "validate_modulename args must be name-value pairs: received '$args'"} + foreach {k v} $args { + switch -- $k { + -errorprefix { + dict set opts $k $v + } + default { + error "validate_modulename error: unknown option '$k'. known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_errorprefix [dict get $opts -errorprefix] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + + validate_name_not_empty_or_spaced $modulename -errorprefix $opt_errorprefix + set testname [string map {:: {}} $modulename] + if {[string first : $testname] >=0} { + error "$opt_errorprefix '$modulename' can only contain paired colons" + } + set badchars [list - "$" "?" "*"] + foreach bc $badchars { + if {[string first $bc $modulename] >= 0} { + error "$opt_errorprefix '$modulename' can not contain character '$bc'" + } + } + return $modulename + } + + proc validate_projectname {projectname args} { + set defaults [list\ + -errorprefix projectname\ + ] + if {[llength $args] %2 != 0} {error "validate_modulename args must be name-value pairs: received '$args'"} + set known_opts [dict keys $defaults] + foreach k [dict keys $args] { + if {$k ni $known_opts} { + error "validate_modulename error: unknown option $k. known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_errorprefix [dict get $opts -errorprefix] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + validate_name_not_empty_or_spaced $projectname -errorprefix $opt_errorprefix + set reserved_words [list etc lib bin modules src doc vendorlib vendormodules embedded runtime _aside _build] + if {$projectname in $reserved_words } { + error "$opt_errorprefix '$projectname' cannot be one of reserved_words: $reserved_words" + } + if {[string first "::" $projectname] >= 0} { + error "$opt_errorprefix '$projectname' cannot contain namespace separator '::'" + } + return $projectname + } + proc validate_name_not_empty_or_spaced {name args} { + set opts [list\ + -errorprefix projectname\ + ] + if {[llength $args] %2 != 0} {error "validate_name_not_empty_or_spaced args must be name-value pairs: received '$args'"} + foreach {k v} $args { + switch -- $k { + -errorprefix { + dict set opts $k $v + } + default { + error "validate_name_not_empty_or_spaced error: unknown option $k. known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_errorprefix [dict get $opts -errorprefix] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + if {![string length $name]} { + error "$opt_errorprefix cannot be empty" + } + if {[string length [string map [list " " "" \n "" \r "" \t ""] $name]] != [string length $name]} { + error "$opt_errorprefix cannot contain whitespace" + } + return $name + } + + #split modulename (as present in a filename or namespaced name) into name/version ignoring leading namespace path + #ignore trailing .tm .TM if present + #if version doesn't pass validation - treat it as part of the modulename and return empty version string without error + #Up to caller to validate. + proc split_modulename_version {modulename} { + set lastpart [namespace tail $modulename] + set lastpart [file tail $lastpart] ;# should be ok to use file tail now that we've ensured no namespace components + if {[string equal -nocase [file extension $modulename] ".tm"]} { + set fileparts [split [file rootname $lastpart] -] + } else { + set fileparts [split $lastpart -] + } + if {[punk::mix::util::is_valid_tm_version [lindex $fileparts end]]} { + set versionsegment [lindex $fileparts end] + set namesegment [join [lrange $fileparts 0 end-1] -];#re-stitch + } else { + # + set namesegment [join $fileparts -] + set versionsegment "" + } + return [list $namesegment $versionsegment] + } + + proc get_status {{workingdir ""} args} { + set result "" + if {$workingdir ne ""} { + if {[file pathtype $workingdir] ne "absolute"} { + set workingdir [file normalize $workingdir] + } + set active_dir $workingdir + } else { + set active_dir [pwd] + } + set defaults [dict create\ + -v 1\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- + set opt_v [dict get $opts -v] + # -- --- --- --- --- --- --- --- --- + + + set repopaths [punk::repo::find_repos [pwd]] + set repos [dict get $repopaths repos] + if {![llength $repos]} { + append result [dict get $repopaths warnings] + } else { + append result [dict get $repopaths warnings] + lassign [lindex $repos 0] repopath repotypes + if {"fossil" in $repotypes} { + #review - multiple process launches to fossil a bit slow on windows.. + #could we query global db in one go instead? + # + set fossil_prog [auto_execok fossil] + append result "FOSSIL project based at $repopath with revision: [punk::repo::fossil_revision $repopath]" \n + set fosinfo [exec {*}$fossil_prog info] + append result [join [punk::repo::grep {repository:*} $fosinfo] \n] \n + + set fosrem [exec {*}$fossil_prog remote ls] + if {[string length $fosrem]} { + append result "Remotes:\n" + append result " " $fosrem \n + } + + + append result [join [punk::repo::grep {tags:*} $fosinfo] \n] \n + + set dbinfo [exec {*}$fossil_prog dbstat] + append result [join [punk::repo::grep {project-name:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {tickets:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {project-age:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {latest-change:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {files:*} $dbinfo] \n] \n + append result [join [punk::repo::grep {check-ins:*} $dbinfo] \n] \n + if {"project" in $repotypes} { + #punk project + if {![catch {package require textblock; package require patternpunk}]} { + set result [textblock::join -- [>punk . logo] " " $result] + append result \n + } + } + + set timeline [exec fossil timeline -n 5 -t ci] + set timeline [string map {\r\n \n} $timeline] + append result $timeline + if {$opt_v} { + set repostate [punk::repo::workingdir_state $repopath -repopaths $repopaths -repotypes fossil] + append result \n [punk::repo::workingdir_state_summary $repostate] + } + + } + #repotypes *could* be both git and fossil - so report both if so + if {"git" in $repotypes} { + append result "GIT project based at $repopath with revision: [punk::repo::git_revision $repopath]" \n + if {[string length [set git_prog [auto_execok git]]]} { + set git_remotes [exec {*}$git_prog remote -v] + append result $git_remotes + if {$opt_v} { + set repostate [punk::repo::workingdir_state $repopath -repopaths $repopaths -repotypes git] + append result \n [punk::repo::workingdir_state_summary $repostate] + } + } + } + + } + + return $result + } + + + proc build_modules_from_source_to_base {srcdir basedir args} { + set antidir [list "#*" "_build" "_aside" ".git" ".fossil*"] ;#exact or glob patterns for folders (at any level) we don't want to search in or copy. + set defaults [list\ + -installer punk::mix::cli::build_modules_from_source_to_base\ + -call-depth-internal 0\ + -max_depth 1000\ + -subdirlist {}\ + -punkcheck_eventobj "\uFFFF"\ + -glob *.tm\ + -podglob #modpod-*\ + ] + set opts [dict merge $defaults $args] + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set installername [dict get $opts -installer] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set CALLDEPTH [dict get $opts -call-depth-internal] + set max_depth [dict get $opts -max_depth] + set subdirlist [dict get $opts -subdirlist] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set fileglob [dict get $opts -glob] + set podglob [dict get $opts -podglob] + if {![string match "*.tm" $fileglob]} { + error "build_modules_from_source_to_base -glob '$fileglob' doesn't seem to target tcl modules." + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_punkcheck_eventobj [dict get $opts -punkcheck_eventobj] + + set magicversion [punk::mix::util::magic_tm_version] ;#deliberately large so given load-preference when testing + set module_list [list] + + if {[file tail [file dirname $srcdir]] ne "src"} { + puts stderr "ERROR build_modules_from_source_to_base can only be called with a srcdir that is a subfolder of your 'src' directory" + puts stderr "The .tm modules are namespaced based on their directory depth - so we need to start at the root" + puts stderr "To build a subtree of your modules - use an appropriate src/modules folder and pass in the -subdirlist." + puts stderr "e.g if your modules are based at /x/src/modules2 and you wish to build only the .tm files at /x/src/modules2/skunkworks/lib" + puts stderr "Use: >build_modules_from_source_to_base /x/src/modules2 /x/modules2 -subdirlist {skunkworks lib}" + exit 2 + } + set srcdirname [file tail $srcdir] + + set build [file dirname $srcdir]/_build/$srcdirname ;#relative to *original* srcdir - not current_source_dir + if {[llength $subdirlist] == 0} { + set target_module_dir $basedir + set current_source_dir $srcdir + } else { + set target_module_dir $basedir/[file join {*}$subdirlist] + set current_source_dir $srcdir/[file join {*}$subdirlist] + } + if {![file exists $target_module_dir]} { + error "build_modules_from_source_to_base from current source dir: '$current_source_dir'. Basedir:'$current_module_dir' doesn't exist or is empty" + } + if {![file exists $current_source_dir]} { + error "build_modules_from_source_to_base from current source dir:'$current_source_dir' doesn't exist or is empty" + } + + #---------------------------------------- + set punkcheck_file [file join $basedir/.punkcheck] + if {$CALLDEPTH == 0} { + + set config [dict create\ + -glob $fileglob\ + -max_depth 0\ + ] + #lassign [punkcheck::start_installer_event $punkcheck_file $installername $srcdir $basedir $config] _eventid punkcheck_eventid _recordset record_list + # -- --- + set installer [punkcheck::installtrack new $installername $punkcheck_file] + $installer set_source_target $srcdir $basedir + set event [$installer start_event $config] + # -- --- + + } else { + set event $opt_punkcheck_eventobj + } + #---------------------------------------- + + + set process_modules [dict create] + #put pods first in processing order + set src_pods [glob -nocomplain -dir $current_source_dir -type d -tail $podglob] + foreach podpath $src_pods { + dict set process_modules $podpath [dict create -type pod] + } + set src_modules [glob -nocomplain -dir $current_source_dir -type f -tail $fileglob] + foreach modulepath $src_modules { + dict set process_modules $modulepath [dict create -type file] + } + + set did_skip 0 ;#flag for stdout/stderr formatting only + dict for {modpath modinfo} $process_modules { + set modtype [dict get $modinfo -type] + + set is_interesting 0 + if {[string match "foobar" $current_source_dir]} { + set is_interesting 1 + } + if {$is_interesting} { + puts "build_modules_from_source_to_base >>> module $current_source_dir/$modpath" + } + set fileparts [split [file rootname $modpath] -] + #set tmfile_versionsegment [lindex $fileparts end] + lassign [split_modulename_version $modpath] basename tmfile_versionsegment + if {$tmfile_versionsegment eq ""} { + #split_modulename_version version part will be empty if not valid tcl version + #last segment doesn't look even slightly versiony - fail. + puts stderr "ERROR: Unable to confirm file $current_source_dir/$modpath is a reasonably versioned .tm module - ABORTING." + exit 1 + } + switch -- $modtype { + pod { + #basename still contains leading #modpod- + if {[string match #modpod-* $basename]} { + set basename [string range $basename 8 end] + } else { + error "build_modules_from_source_to_base, pod, unexpected basename $basename" ;#shouldn't be possible with default podglob - review - why is podglob configurable? + } + set versionfile $current_source_dir/$basename-buildversion.txt ;#needs to be added in targetset_addsource to trigger rebuild if changed (only when magicversion in use) + if {$tmfile_versionsegment eq $magicversion} { + set versionfiledata "" + if {![file exists $versionfile]} { + puts stderr "\nWARNING: Missing buildversion text file: $versionfile" + puts stderr "Using version 0.1 - create $versionfile containing the desired version number as the top line to avoid this warning\n" + set module_build_version "0.1" + } else { + set fd [open $versionfile r] + set versionfiledata [read $fd]; close $fd + set ln0 [lindex [split $versionfiledata \n] 0] + set ln0 [string trim $ln0]; set ln0 [string trim $ln0 \r] + if {![util::is_valid_tm_version $ln0]} { + puts stderr "ERROR: build version '$ln0' specified in $versionfile is not suitable. Please ensure a proper version number is at first line of file" + exit 3 + } + set module_build_version $ln0 + } + } else { + set module_build_version $tmfile_versionsegment + } + + set buildfolder $current_source_dir/_build + file mkdir $buildfolder + # -- --- + set config [dict create\ + -glob *\ + -max_depth 100\ + ] + # -max_depth -1 for no limit + set build_installername pods_in_$current_source_dir + set build_installer [punkcheck::installtrack new $build_installername $buildfolder/.punkcheck] + $build_installer set_source_target $current_source_dir/$modpath $buildfolder + set build_event [$build_installer start_event $config] + # -- --- + set podtree_copy $buildfolder/#modpod-$basename-$module_build_version + set modulefile $buildfolder/$basename-$module_build_version.tm + + + $build_event targetset_init INSTALL $podtree_copy + $build_event targetset_addsource $current_source_dir/$modpath + if {$tmfile_versionsegment eq $magicversion} { + $build_event targetset_addsource $versionfile + } + if {\ + [llength [dict get [$build_event targetset_source_changes] changed]]\ + || [llength [$build_event get_targets_exist]] < [llength [$build_event get_targets]]\ + } { + $build_event targetset_started + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + + set delete_failed 0 + if {[file exists $buildfolder/]} { + puts stderr "deleting existing _build copy at $podtree_copy" + if {[catch { + file delete -force $podtree_copy + } errMsg]} { + puts stderr "[punk::ansi::a+ red]deletion of _build copy at $podtree_copy failed: $errMsg[punk::ansi::a]" + set delete_failed 1 + } + } + if {!$delete_failed} { + puts stdout "copying.." + puts stdout "$current_source_dir/$modpath" + puts stdout "to:" + puts stdout "$podtree_copy" + file copy $current_source_dir/$modpath $podtree_copy + if {$tmfile_versionsegment eq $magicversion} { + set tmfile $buildfolder/#modpod-$basename-$module_build_version/$basename-$magicversion.tm + if {[file exists $tmfile]} { + set newname $buildfolder/#modpod-$basename-$module_build_version/$basename-$module_build_version.tm + file rename $tmfile $newname + set tmfile $newname + } + set fd [open $tmfile r]; fconfigure $fd -translation binary; set data [read $fd]; close $fd + set data [string map [list $magicversion $module_build_version] $data] + set fdout [open $tmfile w] + fconfigure $fdout -translation binary + puts -nonewline $fdout $data + close $fdout + } + #delete and regenerate zip and modpod stubbed zip + set had_error 0 + set notes [list] + if {[catch { + file delete $buildfolder/$basename-$module_build_version.zip + } err] } { + set had_error 1 + lappend notes "zip_delete_failed" + } + if {[catch { + file delete $buildfolder/$basename-$module_build_version.tm + } err]} { + set had_error 1 + lappend notes "tm_delete_failed" + } + #create ordinary zip file without using external executable + package require punk::zip + set zipfile $buildfolder/$basename-$module_build_version.zip ;#ordinary zip file (deflate) + + if 0 { + #use -base $buildfolder so that -directory is included in the archive - the modpod stub relies on this - and extraction would be potentially messy otherwise + punk::zip::mkzip -base $buildfolder -directory $buildfolder/#modpod-$basename-$module_build_version -- $zipfile * + #punk::zip::mkzip stores permissions - (unix style) which confuses zipfs when reading - it misidentifies dirs as files + } + #zipfs mkzip does exactly what we need anyway in this case + set wd [pwd] + cd $buildfolder + puts "zipfs mkzip $zipfile #modpod-$basename-$module_build_version" + zipfs mkzip $zipfile #modpod-$basename-$module_build_version + cd $wd + + package require modpod + modpod::lib::make_zip_modpod $zipfile $modulefile + + + if {$had_error} { + $build_event targetset_end FAILED -note [join $notes ,] + } else { + # -- ---------- + $build_event targetset_end OK + # -- ---------- + } + } else { + $build_event targetset_end FAILED -note "could not delete $podtree_copy" + } + + } else { + puts -nonewline stderr "." + set did_skip 1 + #set file_record [punkcheck::installfile_skipped_install $basedir $file_record] + $build_event targetset_end SKIPPED + } + $build_event destroy + $build_installer destroy + + $event targetset_init INSTALL $target_module_dir/$basename-$module_build_version.tm + $event targetset_addsource $modulefile + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + + $event targetset_started + # -- --- --- --- --- --- + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + lappend module_list $modulefile + file copy -force $modulefile $target_module_dir + puts stderr "Copied zip modpod module $modulefile to $target_module_dir" + # -- --- --- --- --- --- + $event targetset_end OK -note "zip modpod" + } else { + puts -nonewline stderr "." + set did_skip 1 + if {$is_interesting} { + puts stderr "$modulefile [$event targetset_source_changes]" + } + $event targetset_end SKIPPED + } + } + tarjar { + #basename may still contain #tarjar- + #to be obsoleted - update modpod to (optionally) use vfs::tar + } + file { + set m $modpath + if {$tmfile_versionsegment eq $magicversion} { + #set basename [join [lrange $fileparts 0 end-1] -] + set versionfile $current_source_dir/$basename-buildversion.txt + set versionfiledata "" + if {![file exists $versionfile]} { + puts stderr "\nWARNING: Missing buildversion text file: $versionfile" + puts stderr "Using version 0.1 - create $versionfile containing the desired version number as the top line to avoid this warning\n" + set module_build_version "0.1" + } else { + set fd [open $versionfile r] + set versionfiledata [read $fd]; close $fd + set ln0 [lindex [split $versionfiledata \n] 0] + set ln0 [string trim $ln0]; set ln0 [string trim $ln0 \r] + if {![util::is_valid_tm_version $ln0]} { + puts stderr "ERROR: build version '$ln0' specified in $versionfile is not suitable. Please ensure a proper version number is at first line of file" + exit 3 + } + set module_build_version $ln0 + } + + + if {[file exists $current_source_dir/#tarjar-$basename-$magicversion]} { + #rebuild the .tm from the #tarjar + + if {[file exists $current_source_dir/#tarjar-$basename-$magicversion/DESCRIPTION.txt]} { + + } else { + + } + #REVIEW - should be in same structure/depth as $target_module_dir in _build? + + #TODO + set buildfolder $current_sourcedir/_build + file mkdir $buildfolder + + set tmfile $buildfolder/$basename-$module_build_version.tm + file delete -force $buildfolder/#tarjar-$basename-$module_build_version + file delete -force $tmfile + + + file copy -force $current_source_dir/#tarjar-$basename-$magicversion $buildfolder/#tarjar-$basename-$module_build_version + # + #bsdtar doesn't seem to work.. or I haven't worked out the right options? + #exec tar -cvf $buildfolder/$basename-$module_build_version.tm $buildfolder/#tarjar-$basename-$module_build_version + package require tar + tar::create $tmfile $buildfolder/#tarjar-$basename-$module_build_version + if {![file exists $tmfile]} { + puts stdout "ERROR: failed to build tarjar file $tmfile" + exit 4 + } + #copy the file? + #set target $target_module_dir/$basename-$module_build_version.tm + #file copy -force $tmfile $target + + lappend module_list $tmfile + } else { + #assume that either the .tm is not a tarjar - or the tarjar dir is capped (trailing #) and the .tm has been manually tarred. + if {[file exists $current_source_dir/#tarjar-$basename-${magicversion}#]} { + puts stderr "\nWarning: found 'capped' folder #tarjar-$basename-${magicversion}# - No attempt being made to update version in description.txt" + } + + #------------------------------ + # + #set target_relpath [punkcheck::lib::path_relative $basedir $target_module_dir/$basename-$module_build_version.tm] + #set file_record [punkcheck::installfile_begin $basedir $target_relpath $installername -eventid $punkcheck_eventid] + $event targetset_init INSTALL $target_module_dir/$basename-$module_build_version.tm + $event targetset_addsource $versionfile + $event targetset_addsource $current_source_dir/$m + + #set changed_list [list] + ## -- --- --- --- --- --- + #set source_relpath [punkcheck::lib::path_relative $basedir $versionfile] + #set file_record [punkcheck::installfile_add_source_and_fetch_metadata $basedir $source_relpath $file_record] + ## -- --- --- --- --- --- + #set source_relpath [punkcheck::lib::path_relative $basedir $current_source_dir/$m] + #set file_record [punkcheck::installfile_add_source_and_fetch_metadata $basedir $source_relpath $file_record] + ## -- --- --- --- --- --- + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + #set changed_list [dict get $changed_unchanged changed] + + + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + + #set file_record [punkcheck::installfile_started_install $basedir $file_record] + $event targetset_started + # -- --- --- --- --- --- + set target $target_module_dir/$basename-$module_build_version.tm + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + puts stdout "copying module $current_source_dir/$m to $target as version: $module_build_version ([file tail $target])" + set fd [open $current_source_dir/$m r]; fconfigure $fd -translation binary; set data [read $fd]; close $fd + set data [string map [list $magicversion $module_build_version] $data] + set fdout [open $target w] + fconfigure $fdout -translation binary + puts -nonewline $fdout $data + close $fdout + #file copy -force $srcdir/$m $target + lappend module_list $target + # -- --- --- --- --- --- + #set file_record [punkcheck::installfile_finished_install $basedir $file_record] + $event targetset_end OK + } else { + if {$is_interesting} { + puts stdout "skipping module $current_source_dir/$m - no change in sources detected" + } + puts -nonewline stderr "." + set did_skip 1 + #set file_record [punkcheck::installfile_skipped_install $basedir $file_record] + $event targetset_end SKIPPED + } + + #------------------------------ + + } + + continue + } + ##------------------------------ + ## + #set target_relpath [punkcheck::lib::path_relative $basedir $target_module_dir/$m] + #set file_record [punkcheck::installfile_begin $basedir $target_relpath $installername -eventid $punkcheck_eventid] + #set changed_list [list] + ## -- --- --- --- --- --- + #set source_relpath [punkcheck::lib::path_relative $basedir $current_source_dir/$m] + #set file_record [punkcheck::installfile_add_source_and_fetch_metadata $basedir $source_relpath $file_record] + ## -- --- --- --- --- --- + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + #set changed_list [dict get $changed_unchanged changed] + #---------- + $event targetset_init INSTALL $target_module_dir/$m + $event targetset_addsource $current_source_dir/$m + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + + #set file_record [punkcheck::installfile_started_install $basedir $file_record] + $event targetset_started + # -- --- --- --- --- --- + if {$did_skip} {set did_skip 0; puts -nonewline stdout \n} + lappend module_list $current_source_dir/$m + file copy -force $current_source_dir/$m $target_module_dir + puts stderr "Copied already versioned module $current_source_dir/$m to $target_module_dir" + # -- --- --- --- --- --- + #set file_record [punkcheck::installfile_finished_install $basedir $file_record] + $event targetset_end OK -note "already versioned module" + } else { + puts -nonewline stderr "." + set did_skip 1 + if {$is_interesting} { + puts stderr "$current_source_dir/$m [$event targetset_source_changes]" + } + #set file_record [punkcheck::installfile_skipped_install $basedir $file_record] + $event targetset_end SKIPPED + } + } + } + } ;#end dict for {modpath modinfo} $process_modules + + + if {$CALLDEPTH >= $max_depth} { + set subdirs [list] + } else { + set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *] + } + #puts stderr "subdirs: $subdirs" + foreach d $subdirs { + set skipdir 0 + foreach dg $antidir { + if {[string match $dg $d]} { + set skipdir 1 + continue + } + } + if {$skipdir} { + continue + } + if {![file exists $target_module_dir/$d]} { + file mkdir $target_module_dir/$d + } + lappend module_list {*}[build_modules_from_source_to_base $srcdir $basedir\ + -call-depth-internal [expr {$CALLDEPTH +1}]\ + -subdirlist [list {*}$subdirlist $d]\ + -punkcheck_eventobj $event\ + -glob $fileglob\ + -podglob $podglob\ + ] + } + if {$did_skip} { + puts -nonewline stdout \n + } + if {$CALLDEPTH == 0} { + $event destroy + $installer destroy + } + return $module_list + } + + variable kettle_reset_bodies [dict create] + variable kettle_reset_args [dict create] + #We are abusing kettle to run in-process. + # when we change to another project we need recipes to be reloaded. + # Kettle rewrites some of it's own procs - stopping reloading of recipes when we change folders + #kettle_init stores the original proc bodies & args + proc kettle_init {} { + variable kettle_reset_bodies ;#dict + variable kettle_reset_args + set reset_procs [list\ + ::kettle::benchmarks\ + ::kettle::doc\ + ::kettle::figures\ + ::kettle::meta::scan\ + ::kettle::testsuite\ + ] + foreach p $reset_procs { + set b [info body $p] + if {[string match "*Overwrite self*" $b]} { + dict set kettle_reset_bodies $p $b + set argnames [info args $p] + set arglist [list] + foreach a $argnames { + if {[info default $p $a dval]} { + lappend arglist [list $a $dval] + } else { + lappend arglist $a + } + } + dict set kettle_reset_args $p $arglist + } + } + + } + #call kettle_reinit to ensure recipes point to current project + proc kettle_reinit {} { + variable kettle_reset_bodies + variable kettle_reset_args + dict for {p b} $kettle_reset_bodies { + #set b [dict get $kettle_reset_bodies $p] + set argl [dict get $kettle_reset_args $p] + uplevel 1 [list ::proc $p $argl $b] + } + #todo - determine standard recipes by examining standard.tcl instead of hard coding? + set standard_recipes [list\ + null\ + forever\ + list-recipes\ + help-recipes\ + help-dump\ + help-recipes\ + help\ + list\ + list-options\ + help-options\ + show-configuration\ + show-state\ + show\ + meta-status\ + gui\ + ] + #set ::kettle::recipe::recipe [dict create] + dict for {r -} $::kettle::recipe::recipe { + if {$r ni $standard_recipes} { + dict unset ::kettle::recipe::recipe $r + } + } + } + proc kettle_call {calltype args} { + variable kettle_reset_bodies + switch -- $calltype { + lib {} + shell { + set kettleappfile [file dirname [info nameofexecutable]]/kettle + set kettlebatfile [file dirname [info nameofexecutable]]/kettle.bat + + if {(![file exists $kettleappfile]) && (![file exists $kettlebatfile])} { + error "deck kettle_call unable to find installed kettle application file '$kettleappfile' (or '$kettlebatfile' if on windows)" + } + if {[file exists $kettleappfile]} { + set kettlescript $kettleappfile + } + if {$::tcl_platform(platform) eq "windows"} { + if {[file exists $kettlebatfile]} { + set kettlescript $kettlebatfile + } + } + } + default { + error "deck kettle_call 1st argument must be one of: 'lib' for direct use of kettle module or 'shell' to call as separate process" + } + } + set startdir [pwd] + if {![file exists $startdir/build.tcl]} { + error "deck kettle must be run from a folder containing build.tcl (cwd: [pwd])" + } + if {[package provide kettle] eq ""} { + puts stdout "Loading kettle package - may be delay on first load ..." + package require kettle + kettle_init ;#store original procs for those kettle procs that rewrite themselves + } else { + if {[dict size $kettle_reset_bodies] == 0} { + #presumably package require kettle was called without calling our kettle_init hack. + kettle_init + } else { + #undo proc rewrites + kettle_reinit + } + } + set first [lindex $args 0] + if {[string match @* $first]} { + error "deck kettle doesn't support special operations - try calling tclsh kettle directly" + } + if {$first eq "-f"} { + set args [lassign $args __ path] + } else { + set path $startdir/build.tcl + } + set opts [list] + + if {[lindex $args 0] eq "-trace"} { + set args [lrange $args 1 end] + lappend opts --verbose on + } + set goals [list] + + if {$calltype eq "lib"} { + file mkdir ~/.kettle + set dotfile ~/.kettle/config + if {[file exists $dotfile] && + [file isfile $dotfile] && + [file readable $dotfile]} { + ::kettle io trace {Loading dotfile $dotfile ...} + set args [list {*}[::kettle path cat $dotfile] {*}$args] + } + } + + #hardcoded kettle option names (::kettle option names) - retrieved using kettle::option names + #This is done so we don't have to load kettle lib for shell call (both loading as module and running shell are annoyingly SLOW) + #REVIEW - needs to be updated to keep in sync with kettle. + set knownopts [list\ + --exec-prefix --bin-dir --lib-dir --prefix --man-dir --html-dir --markdown-dir --include-dir \ + --ignore-glob --dry --verbose --machine --color --state --config --with-shell --log \ + --log-append --log-mode --with-dia --constraints --file --limitconstraints --tmatch --notfile --single --valgrind --tskip --repeats \ + --iters --collate --match --rmatch --with-doc-destination --with-git --target --test-include \ + ] + + while {[llength $args]} { + set o [lindex $args 0] + switch -glob -- $o { + --* { + #instead of using: kettle option known + if {$o ni $knownopts} { + error "Unable to process unknown option $o." {} [list KETTLE (deck)] + } + lappend opts $o [lindex $args 1] + #::kettle::option set $o [lindex $args 1] + set args [lrange $args 2 end] + } + default { + lappend goals $o + set args [lrange $args 1 end] + } + } + } + + if {![llength $goals]} { + lappend goals help + } + if {"--prefix" ni [dict keys $opts]} { + dict set opts --prefix [file dirname $startdir] + } + if {$calltype eq "lib"} { + ::kettle status clear + ::kettle::option::set @kettle $startdir + foreach {o v} $opts { + ::kettle option set $o $v + } + ::kettle option set @srcscript $path + ::kettle option set @srcdir [file dirname $path] + ::kettle option set @goals $goals + #load standard recipes as listed in build.tcl + ::source $path + puts stderr "recipes: [::kettle recipe names]" + ::kettle recipe run {*}[::kettle option get @goals] + + set state [::kettle option get --state] + if {$state ne {}} { + puts stderr "saving kettle state: $state" + ::kettle status save $state + } + + } else { + #shell + puts stdout "Running external kettle process with args: $opts $goals" + run -n tclsh $kettlescript -f $path {*}$opts {*}$goals + } + + } + proc kettle_punk_recipes {} { + set txtdst ... + } + + } +} + + +namespace eval punk::mix::cli { + proc _cli {args} { + #don't use tailcall - base uses info level to determine caller + ::punk::mix::base::_cli {*}$args + } + variable default_command help + package require punk::mix::base + package require punk::overlay + if {[catch { + punk::overlay::custom_from_base [namespace current] ::punk::mix::base + } errM]} { + puts stderr "punk::mix::cli load error: Failed to overlay punk::mix::base $errM" + error "punk::mix::cli error: $errM" + } +} + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::cli [namespace eval punk::mix::cli { + variable version + set version 0.3 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/buildsuite-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/buildsuite-0.1.0.tm new file mode 100644 index 00000000..883e02d2 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/buildsuite-0.1.0.tm @@ -0,0 +1,152 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::buildsuite 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::buildsuite { + namespace export * + proc projects {suite} { + set pathinfo [punk::repo::find_repos [pwd]] + set projectdir [dict get $pathinfo closest] + set suites_dir [file join $projectdir src buildsuites] + if {![file isdirectory [file join $suites_dir $suite]]} { + puts stderr "suite: $suite not found in buildsuites folder: $suites_dir" + return + } + set suite_dir [file join $suites_dir $suite] + set projects [glob -dir $suite_dir -type d -tails *] + + #use internal du which although breadth-first is generally faster + puts stdout "Examining source folders in $suite_dir." ;#A hint that something is happening in case sources are large + set du_info [punk::du::du -d 1 -b $suite_dir] + set du_sizes [dict create] + set suite_total_size "-" + foreach du_record $du_info { + if {[llength $du_record] != 2} { + #sanity precaution - punk::du::du should always output list of 2 element lists - at least with flags we're using + continue + } + set sz [lindex $du_record 0] + set path_parts [file split [lindex $du_record 1]] ;#should handle spaced-paths ok. + set s [lindex $path_parts end-1] + set p [lindex $path_parts end] + + #This handles case where a project folder is same name as suite e.g src/buildsuites/tcl/tcl + #so we can't just use tail as dict key. We could assume last record is always total - but + if {![string match -nocase $s $suite]} { + if {$s eq "buildsuites" && [string match -nocase $p $suite]} { + set suite_total_size $sz ;#this includes config files in suite base - so we don't really want to use this to report the total source size + } else { + #something else - shouldn't happen + puts stderr "Unexpected output from du in suite_dir: $suite_dir" + puts stderr "$du_record" + #try to continue anyway + } + } else { + dict set du_sizes $p $sz + } + } + + #build another dict for sizes where we ensure exactly one entry for each project exists and exclude total (don't blindly trust du output e.g in case weird filename/permission issue) + set psizes [list] + foreach p $projects { + if {[dict exists $du_sizes $p]} { + dict set psizes $p [dict get $du_sizes $p] + } else { + dict set psizes $p - + } + } + set total_source_size "-" + if {[catch { + set total_source_size [tcl::mathop::+ {*}[dict values $psizes]] + } errM]} { + puts stderr "Failed to calculate total source size. Errmsg: $errM" + } + package require overtype + + set title1 "Projects" + set widest1 [tcl::mathfunc::max {*}[lmap v [concat [list $title1] $projects] {punk::strlen $v}]] + set col1 [string repeat " " $widest1] + + set size_values [dict values $psizes] + # Title is probably widest - but go through the process anyway! + set title2 "Source Bytes" + set widest2 [tcl::mathfunc::max {*}[lmap v [concat [list $title2] $size_values] {punk::strlen $v}]] + set col2 [string repeat " " $widest2] + + + set output "" + append output "[overtype::left $col1 $title1] [overtype::right $col2 $title2]" \n + foreach p [lsort $projects] { + #todo - provide some basic info for each - last build time? last time-to-build? + append output "[overtype::left $col1 $p] [overtype::right $col2 [dict get $psizes $p]]" \n + } + append output "Total Source size: $total_source_size bytes" \n + return $output + } + + + namespace eval collection { + namespace export * + proc _default {{glob {}}} { + if {![string length $glob]} { + set glob * + } + #todo - review - we want the furthest not the closest if we are potentially inside a buildsuite project + set pathinfo [punk::repo::find_repos [pwd]] + set projectdir [dict get $pathinfo closest] + set suites_dir [file join $projectdir src buildsuites] + if {![file exists $suites_dir]} { + puts stderr "No buildsuites folder found at $suites_dir" + return + } + set suites [lsort [glob -dir $suites_dir -type d -tails *]] + if {$glob ne "*"} { + set suites [lsearch -all -inline $suites $glob] + } + return $suites + } + } + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::buildsuite [namespace eval punk::mix::commandset::buildsuite { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/debug-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/debug-0.1.0.tm new file mode 100644 index 00000000..c6c83b69 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/debug-0.1.0.tm @@ -0,0 +1,92 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::debug 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::debug { + namespace export get paths + namespace path ::punk::mix::cli + + #Except for 'get' - all debug commands should emit to stdout + proc paths {} { + set out "" + puts stdout "find_repos output:" + set pathinfo [punk::repo::find_repos [pwd]] + pdict pathinfo + + set projectdir [dict get $pathinfo closest] + set modulefolders [lib::find_source_module_paths $projectdir] + puts stdout "modulefolders: $modulefolders" + + set template_base_dict [punk::mix::base::lib::get_template_basefolders] + puts stdout "get_template_basefolders output:" + pdict template_base_dict */* + return + } + + #call other debug command - but capture stdout as return value + proc get {args} { + set nm [lindex $args 0] + if {$nm eq ""} { + set nscmds [info commands [namespace current]::*] + set cmds [lmap v $nscmds {namespace tail $v}] + error "debug.get missing debug command argument. Try one of: $cmds" + return + } + set nextargs [lrange $args 1 end] + set out "" + if {[info commands [namespace current]::$nm] ne ""} { + append out [runout -n -tcl [namespace current]::$nm {*}$nextargs] \n + } else { + set nscmds [info commands [namespace current]::*] + set cmds [lmap v $nscmds {namespace tail $v}] + error "debug.get invalid debug command '$nm' Try one of: $cmds" + } + return $out + } + namespace eval lib { + + } + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::debug [namespace eval punk::mix::commandset::debug { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/doc-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/doc-0.1.0.tm new file mode 100644 index 00000000..d3b0585c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/doc-0.1.0.tm @@ -0,0 +1,290 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::doc 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +package require punk::path ;# for treefilenames, relative +package require punk::repo +package require punk::docgen ;#inline doctools - generate doctools .man files at src/docgen prior to using kettle to producing .html .md etc +package require punk::mix::cli ;#punk::mix::cli::lib used for kettle_call +#package require punk::mix::util ;#for path_relative + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::doc { + namespace export * + + proc _default {} { + puts "documentation subsystem" + puts "commands: doc.build" + puts " build documentation from src/doc to src/embedded using the kettle build tool" + puts "commands: doc.status" + } + + proc build {} { + puts "build docs" + set projectdir [punk::repo::find_project] + if {$projectdir eq ""} { + puts stderr "No current project dir - unable to build docs" + return + } + #user may delete the comment containing "--- punk::docgen::overwrites" and then manually edit, and we won't overwrite + #we still generate output in src/docgen so user can diff and manually update if thats what they prefer + set oldfiles [punk::path::treefilenames -dir $projectdir/src/doc _module_*.man] + foreach maybedoomed $oldfiles { + set fd [open $maybedoomed r] + set data [read $fd] + close $fd + if {[string match "*--- punk::docgen overwrites *" $data]} { + file delete -force $maybedoomed + } + } + set generated [lib::do_docgen modules] + if {[dict get $generated count] > 0} { + #review + set doclist [dict get $generated docs] + set source_base [dict get $generated base] + set target_base $projectdir/src/doc + foreach dinfo $doclist { + lassign $dinfo module fpath + set relpath [punk::path::relative $source_base $fpath] + set relfolder [file dirname $relpath] + if {$relfolder eq "."} { + set relfolder "" + } + file mkdir [file join $target_base $relfolder] + set target [file join $target_base $relfolder _module_[file tail $fpath]] + puts stderr "target --> $target" + if {![file exists $target]} { + file copy $fpath $target + } + } + } + + if {[file exists $projectdir/src/doc]} { + set original_wd [pwd] + cd $projectdir/src + #---------- + set installer [punkcheck::installtrack new project.new $projectdir/src/.punkcheck] + $installer set_source_target $projectdir/src/doc $projectdir/src/embedded + set event [$installer start_event {-install_step kettledoc}] + #use same virtual id "kettle_build_doc" as project.new - review best way to keep identifiers like this in sync. + $event targetset_init VIRTUAL kettle_build_doc ;#VIRTUAL - since there is no specific target file - and we don't know all the files that will be generated + $event targetset_addsource $projectdir/src/doc ;#whole doc tree is considered the source + #---------- + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "BUILDING DOCS at $projectdir/src/embedded from src/doc" + if {[catch { + if {"::meta" eq [info commands ::meta]} { + puts stderr "There appears to be a leftover ::meta command which is presumed to be from doctools. Destroying object" + ::meta destroy + } + punk::mix::cli::lib::kettle_call lib doc + #Kettle doc + + } errM]} { + $event targetset_end FAILED -note "kettle_build_doc failed: $errM" + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "No change detected in src/doc" + $event targetset_end SKIPPED + } + $event end + $event destroy + $installer destroy + cd $original_wd + } else { + puts stderr "No doc folder found at $projectdir/src/doc" + } + } + proc status {} { + set projectdir [punk::repo::find_project] + if {$projectdir eq ""} { + puts stderr "No current project dir - unable to check doc status" + return + } + if {![file exists $projectdir/src/doc]} { + set result "No documentation source found. Expected .man files in doctools format at $projectdir/src/doc" + return $result + } + set original_wd [pwd] + cd $projectdir/src + puts stdout "Testing status of doctools source location $projectdir/src/doc ..." + flush stdout + #---------- + set installer [punkcheck::installtrack new project.new $projectdir/src/.punkcheck] + $installer set_source_target $projectdir/src/doc $projectdir/src/embedded + set event [$installer start_event {-install_step kettledoc}] + #use same virtual id "kettle_build_doc" as project.new - review best way to keep identifiers like this in sync. + $event targetset_init QUERY kettle_build_doc ;#usually VIRTUAL - since there is no specific target file - and we don't know all the files that will be generated - but here we use QUERY to ensure no writes to .punkcheck + set last_completion [$event targetset_last_complete] + + if {[llength $last_completion]} { + #adding a source causes it to be checksummed + $event targetset_addsource $projectdir/src/doc ;#whole doc tree is considered the source + #---------- + set changeinfo [$event targetset_source_changes] + if {\ + [llength [dict get $changeinfo changed]]\ + } { + puts stdout "changed" + puts stdout $changeinfo + } else { + puts stdout "No changes detected in $projectdir/src/doc tree" + } + } else { + #no previous completion-record for this target - must assume changed - no need to trigger checksumming + puts stdout "No existing record of doc build in .punkcheck. Assume it needs to be rebuilt." + } + + + $event destroy + $installer destroy + + cd $original_wd + } + proc validate {} { + #todo - run and validate punk::docgen output + set projectdir [punk::repo::find_project] + if {$projectdir eq ""} { + puts stderr "No current project dir - unable to check doc status" + return + } + if {![file exists $projectdir/src/doc]} { + set result "No documentation source found. Expected .man files in doctools format at $projectdir/src/doc" + return $result + } + set original_wd [pwd] + set docroot $projectdir/src/doc + cd $docroot + + dtplite validate $docroot + + #punk::mix::cli::lib::kettle_call lib validate-doc + + cd $original_wd + } + + namespace eval collection { + variable pkg + set pkg punk::mix::commandset::doc + + namespace export * + namespace path [namespace parent] + + } + + namespace eval lib { + variable pkg + set pkg punk::mix::commandset::doc + proc do_docgen {{project_subpath modules}} { + #Extract doctools comments from source code + set projectdir [punk::repo::find_project] + set output_base [file join $projectdir src docgen] + set codesource_path [file join $projectdir $project_subpath] + if {![file isdirectory $codesource_path]} { + puts stderr "WARNING punk::mix::commandset::doc unable to find codesource_path $codesource_path during do_docgen - skipping inline doctools generation" + return + } + if {[file isdirectory $output_base]} { + if {[catch { + file delete -force $output_base + }]} { + error "do_docgen failed to delete existing output base folder: $output_base" + } + } + file mkdir $output_base + + set matched_paths [punk::path::treefilenames -dir $codesource_path -antiglob_paths {**/mix/templates/** **/project_layouts/** **/decktemplates/** **/_aside **/_aside/**} *.tm] + set count 0 + set newdocs [list] + set docgen_header_comments "" + append docgen_header_comments {[comment {--- punk::docgen generated from inline doctools comments ---}]} \n + append docgen_header_comments {[comment {--- punk::docgen DO NOT EDIT DOCS HERE UNLESS YOU REMOVE THESE COMMENT LINES ---}]} \n + append docgen_header_comments {[comment {--- punk::docgen overwrites this file ---}]} \n + foreach fullpath $matched_paths { + set doctools [punk::docgen::get_doctools_comments $fullpath] + if {$doctools ne ""} { + set fname [file tail $fullpath] + set mod_tail [file rootname $fname] + set relpath [punk::path::relative $codesource_path [file dirname $fullpath]] + if {$relpath eq "."} { + set relpath "" + } + set tailsegs [file split $relpath] + set module_fullname [join $tailsegs ::]::$mod_tail + set target_docname $fname.man + set this_outdir [file join $output_base $relpath] + + if {[string length $fname] > 99} { + #output needs to be tarballed to do checksum change tests in a reasonably straightforward and not-too-terribly slow way. + #hack - review. Determine exact limit - test if tcllib tar fixed or if it's a limit of the particular tar format + #work around tcllib tar filename length limit ( somewhere around 100?) This seems to be a limit on the length of a particular segment in the path.. not whole path length? + #this case only came up because docgen used to path munge to long filenames - but left because we know there is a limit and renaming fixes it - even if it's ugly - but still allows doc generation. + #review - if we're checking fname - should also test length of whole path and determine limits for tar + package require md5 + set target_docname [md5::md5 -hex $fullpath]_overlongfilename.man + puts stderr "WARNING - overlong file name - renaming $fullpath" + puts stderr " to [file dirname $fullpath]/$target_docname" + } + + file mkdir $this_outdir + puts stdout "saving [string length $doctools] bytes of doctools output from file $relpath/$fname" + set outfile [file join $this_outdir $target_docname] + set fd [open $outfile w] + fconfigure $fd -translation binary + puts -nonewline $fd $docgen_header_comments$doctools + close $fd + incr count + lappend newdocs [list $module_fullname $outfile] + } + } + return [list count $count docs $newdocs base $output_base] + } + + } +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::doc [namespace eval punk::mix::commandset::doc { + variable pkg punk::mix::commandset::doc + variable version + set version 0.1.0 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/layout-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/layout-0.1.0.tm new file mode 100644 index 00000000..401ddb72 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/layout-0.1.0.tm @@ -0,0 +1,288 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::layout 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +package require punk::args +#sort of a circular dependency when commandset loaded by punk::mix::cli - that's ok, but this could theoretically be loaded by another cli and with another base +package require punk::mix +package require punk::mix::base + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::layout { + namespace export * + + + #per layout functions + proc files {{layout ""}} { + set argd [punk::args::get_dict { + *values -min 1 -max 1 + layout -type string -minlen 1 + } [list $layout]] + + set allfiles [lib::layout_all_files $layout] + return [join $allfiles \n] + } + proc templatefiles {layout} { + set templatefiles [lib::layout_scan_for_template_files $layout] + return [join $templatefiles \n] + } + proc templatefiles.relative {layout} { + + set layoutdict [lib::layouts_dict] + if {![dict exists $layoutdict $layout]} { + puts stderr "layout '$layout' not found." + return + } + set layoutinfo [dict get $layoutdict $layout] + set layoutfolder [dict get $layoutinfo path] + + + set stripprefix [file normalize $layoutfolder] + set templatefiles [lib::layout_scan_for_template_files $layout] + set tails [list] + foreach templatefullpath $templatefiles { + lappend tails [punk::repo::path_strip_alreadynormalized_prefixdepth $templatefullpath $stripprefix] + } + return [join $tails \n] + } + + #layout collection functions - to be imported with punk::overlay::import_commandset separately + namespace eval collection { + namespace export * + proc _defaultx {{glob {}}} { + if {![string length $glob]} { + set glob * + } + set layouts [list] + set layoutdict [punk::cap::call_handler punk.templates get_itemdict_projectlayouts] + #set tplfolderdict [punk::mix::base::lib::get_template_basefolders] + dict for {layoutname layoutinfo} $layoutdict { + lappend layouts [list $layoutname $layoutinfo] + } + #return [join [lsort -index 0 $layouts] \n] + return [join $layouts \n] + } + + proc _default {args} { + punk::args::get_dict [subst { + *proc -name ::punk::mix::commandset::layout::collection::_default + -startdir -type string + -not -type string -multiple 1 + globsearches -default * -multiple 1 + }] $args + + set tdict_low_to_high [as_dict {*}$args] + #convert to screen order - with higher priority at the top + set tdict [dict create] + foreach k [lreverse [dict keys $tdict_low_to_high]] { + dict set tdict $k [dict get $tdict_low_to_high $k] + } + + package require overtype + package require textblock + #set pathinfolist [dict values $tdict] + #set paths [lsearch -all -inline -index 1 -subindices $pathinfolist *] ;#relies on first key of templates_dict being path + + set names [dict keys $tdict] + set paths [list] + set pathtypes [list] + dict for {nm tinfo} $tdict { + lappend paths [dict get $tinfo path] + lappend pathtypes [dict get $tinfo sourceinfo pathtype] + } + + set title(path) "Path" + set widest(path) [tcl::mathfunc::max {*}[lmap v [concat [list $title(path)] $paths] {punk::strlen $v}]] + set col(path) [string repeat " " $widest(path)] + + set title(pathtype) "[a+ green]Path Type[a]" + set widest(pathtype) [tcl::mathfunc::max {*}[lmap v [concat [list $title(pathtype)] $pathtypes] {punk::strlen $v}]] + set col(pathtype) [string repeat " " $widest(pathtype)] + + set title(name) "Layout Name" + set widest(name) [tcl::mathfunc::max {*}[lmap v [concat [list $title(name)] $names] {punk::strlen $v}]] + set col(name) [string repeat " " $widest(name)] + + set vsep " | " + set vsep_w [string length $vsep] ;#unicode? + set tablewidth [expr {$widest(name) + $vsep_w + $widest(pathtype) + $vsep_w + $widest(path)}] + set table "" + append table [string repeat - $tablewidth] \n + append table "[textblock::join -- [overtype::left $col(name) $title(name)] $vsep [overtype::left $col(pathtype) $title(pathtype)] $vsep [overtype::left $col(path) $title(path)]]" \n + append table [string repeat - $tablewidth] \n + + foreach n $names pt $pathtypes p $paths { + append table "[textblock::join -- [overtype::left $col(name) $n] $vsep [overtype::left $col(pathtype) $pt] $vsep [overtype::left $col(path) $p]]" \n + } + + return $table + } + proc references {args} { + set tdict_low_to_high [references_as_dict {*}$args] + #convert to screen order - with higher priority at the top + set tdict [dict create] + foreach k [lreverse [dict keys $tdict_low_to_high]] { + dict set tdict $k [dict get $tdict_low_to_high $k] + } + + package require overtype + package require textblock + #set pathinfolist [dict values $tdict] + #set paths [lsearch -all -inline -index 1 -subindices $pathinfolist *] ;#relies on first key of templates_dict being path + + set names [dict keys $tdict] + set paths [list] + set pathtypes [list] + dict for {nm tinfo} $tdict { + lappend paths [dict get $tinfo path] + lappend pathtypes [dict get $tinfo sourceinfo pathtype] + } + + set title(path) "Path" + set widest(path) [tcl::mathfunc::max {*}[lmap v [concat [list $title(path)] $paths] {punk::strlen $v}]] + set col(path) [string repeat " " $widest(path)] + + set title(pathtype) "[a+ green]Path Type[a]" + set widest(pathtype) [tcl::mathfunc::max {*}[lmap v [concat [list $title(pathtype)] $pathtypes] {punk::strlen $v}]] + set col(pathtype) [string repeat " " $widest(pathtype)] + + set title(name) "Layout Name" + set widest(name) [tcl::mathfunc::max {*}[lmap v [concat [list $title(name)] $names] {punk::strlen $v}]] + set col(name) [string repeat " " $widest(name)] + + set vsep " | " + set vsep_w [string length $vsep] ;#unicode? + set tablewidth [expr {$widest(name) + $vsep_w + $widest(pathtype) + $vsep_w + $widest(path)}] + set table "" + append table [string repeat - $tablewidth] \n + append table "[textblock::join -- [overtype::left $col(name) $title(name)] $vsep [overtype::left $col(pathtype) $title(pathtype)] $vsep [overtype::left $col(path) $title(path)]]" \n + append table [string repeat - $tablewidth] \n + + foreach n $names pt $pathtypes p $paths { + append table "[textblock::join -- [overtype::left $col(name) $n] $vsep [overtype::left $col(pathtype) $pt] $vsep [overtype::left $col(path) $p]]" \n + } + + return $table + } + + proc as_dict {args} { + tailcall punk::mix::commandset::layout::lib::layouts_dict {*}$args + } + proc references_as_dict {args} { + package require punk::cap + if {[punk::cap::capability_has_handler punk.templates]} { + set ref_dict [punk::cap::call_handler punk.templates get_itemdict_projectlayoutrefs {*}$args] + } else { + put stderr "commandset::layout::lib::layouts_dict WARNING - no handler available for the 'punk.templates' capability - template providers will be unable to provide template locations" + } + return $ref_dict + } + } + namespace eval lib { + proc layouts_dict {args} { + package require punk::cap + if {[punk::cap::capability_has_handler punk.templates]} { + set layout_dict [punk::cap::call_handler punk.templates get_itemdict_projectlayouts {*}$args] + } else { + put stderr "commandset::layout::lib::layouts_dict WARNING - no handler available for the 'punk.templates' capability - template providers will be unable to provide template locations" + } + return $layout_dict + } + + proc layout_all_files {layout} { + #todo - allow versionless layout name to pick highest version found + set layoutdict [layouts_dict] + if {![dict exists $layoutdict $layout]} { + puts stderr "layout '$layout' not found." + return + } + set layoutinfo [dict get $layoutdict $layout] + set layoutfolder [dict get $layoutinfo path] + if {![file isdirectory $layoutfolder]} { + puts stderr "layout '$layout' points to path $layoutfolder - but it doesn't seem to exist" + } + set file_list [list] + util::foreach-file $layoutfolder path { + lappend file_list $path + } + + return $file_list + } + + # + proc layout_scan_for_template_files {layout {tags {}}} { + #todo JMN + set layoutdict [layouts_dict] + if {![dict exists $layoutdict $layout]} { + puts stderr "layout '$layout' not found." + return + } + set layoutinfo [dict get $layoutdict $layout] + set layoutfolder [dict get $layoutinfo path] + + #use last matching layout found. review silent if multiple? + if {![llength $tags]} { + #todo - get standard tags from somewhere + set tagnames [list project] + foreach tn $tagnames { + lappend tags [string cat % $tn %] + } + } + set file_list [list] + util::foreach-file $layoutfolder path { + set fd [open $path r] + fconfigure $fd -translation binary + set data [read $fd] + close $fd + foreach tag $tags { + if {[string match "*$tag*" $data]} { + lappend file_list $path + } + } + } + + return $file_list + } + } + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::layout [namespace eval punk::mix::commandset::layout { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm new file mode 100644 index 00000000..f94bfed0 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/loadedlib-0.1.0.tm @@ -0,0 +1,593 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::loadedlib 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::ns +package require punk::lib + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::loadedlib { + namespace export * + #search automatically wrapped in * * - can contain inner * ? globs + proc search {args} { + set argspecs { + *proc -name "punk::mix::commandset::loadedlib search" -help "search all Tcl libraries available to your local interpreter" + -return -type string -default table -choices {table tableobject list lines} + -present -type integer -default 2 -choices {0 1 2} -choicelabels {absent present both} -help "(unimplemented) Display only those that are 0:absent 1:present 2:both" + -highlight -type boolean -default 1 -help "Highlight which version is present with ansi underline and colour" + -refresh -default 0 -type boolean -help "Re-scan the tm and library folders" + searchstrings -default * -multiple 1 -help "Names to search for, may contain glob chars (* ?) e.g *lib* + If no glob chars are explicitly specified, the searchstring will be wrapped with star globs. + eg name -> *name* + " + } + set argd [punk::args::get_dict $argspecs $args] + set searchstrings [dict get $argd values searchstrings] + set opts [dict get $argd opts] + set opt_return [dict get $opts -return] + set opt_highlight [dict get $opts -highlight] + + catch {package require frobznodule666} ;#ensure pkg system has loaded/searched for everything + if {[catch {package require natsort}]} { + set has_natsort 0 + } else { + set has_natsort 1 + } + set packages [package names] + set matches [list] + foreach search $searchstrings { + if {[regexp {[?*]} $search]} { + #caller has specified specific glob pattern - use it + #todo - respect supplied case only if uppers present? require another flag? + lappend matches {*}[lsearch -all -inline -nocase $packages $search] + } elseif {[string match =* $search]} { + lappend matches {*}[lsearch -all -inline -exact $packages [string range $search 1 end]] + } else { + #make it easy to search for anything + lappend matches {*}[lsearch -all -inline -nocase $packages "*$search*"] + } + } + set matches [lsort -unique $matches][unset matches] + set matchinfo [list] + set highlight_ansi [a+ web-limegreen underline] + set RST [a] + foreach m $matches { + set versions [package versions $m] + if {![llength $versions]} { + #e.g builtins such as zlib - shows no versions - but will show version when package present/provide used + set versions [package provide $m] + #if {![catch {package present $m} v]} { + # set versions $v + #} + } + if {$has_natsort} { + set versions [natsort::sort $versions] + } else { + set versions [lsort $versions] + } + if {$opt_highlight} { + set v [package provide $m] + if {$v ne ""} { + set posn [lsearch $versions $v] + if {$posn >= 0} { + #FIXME! (probably in textblock::pad ?) + #TODO - determine why underline is extended to padding even with double reset. (space or other char required to prevent) + set highlighted "$highlight_ansi$v$RST $RST" + set versions [lreplace $versions $posn $posn $highlighted] + } else { + #shouldn't be possible? + puts stderr "failed to find version '$v' in versions:$versions for package $m" + } + } + } + lappend matchinfo [list $m $versions] + } + switch -- $opt_return { + list { + return $matchinfo + } + lines { + return [join $matchinfo \n] + } + table - tableobject { + set t [textblock::class::table new] + $t add_column -headers "Package" + $t add_column -headers "Version" + $t configure -show_hseps 0 + foreach m $matchinfo { + $t add_row [list [lindex $m 0] [join [lindex $m 1] " "]] + } + if {$opt_return eq "tableobject"} { + return $t + } + set result [$t print] + $t destroy + return $result + } + } + } + proc loaded.search {searchstring} { + set search_result [search $searchstring] + set all_libs [split $search_result \n] + set col1items [list] + set col2items [list] + set col3items [list] + foreach libinfo $all_libs { + if {[string trim $libinfo] eq ""} { + continue + } + set versions [lassign $libinfo libname] + if {[set ver [package provide $libname]] ne ""} { + lappend col1items $libname + lappend col2items $versions + lappend col3items $ver + } + } + + package require overtype + set title1 "Library" + set widest1 [tcl::mathfunc::max {*}[lmap v [concat [list $title1] $col1items] {string length $v}]] + set col1 [string repeat " " $widest1] + set title2 "Versions Avail." + set widest2 [tcl::mathfunc::max {*}[lmap v [concat [list $title2] $col2items] {string length $v}]] + set col2 [string repeat " " $widest2] + set title3 "Loaded Version" + set widest3 [tcl::mathfunc::max {*}[lmap v [concat [list $title3] $col3items] {string length $v}]] + set col3 [string repeat " " $widest3] + + + set tablewidth [expr {$widest1 + 1 + $widest2 + 1 + $widest3}] + + set table "" + append table [string repeat - $tablewidth] \n + append table "[overtype::left $col1 $title1] [overtype::left $col2 $title2] [overtype::left $col3 $title3]" \n + append table [string repeat - $tablewidth] \n + foreach c1 $col1items c2 $col2items c3 $col3items { + append table "[overtype::left $col1 $c1] [overtype::left $col2 $c2] [overtype::left $col3 $c3]" \n + } + + return $table + + + set loaded_libs [list] + foreach libinfo $all_libs { + if {[string trim $libinfo] eq ""} { + continue + } + set versions [lassign $libinfo libname] + if {[set ver [package provide $libname]] ne ""} { + lappend loaded_libs "$libname $versions (loaded $ver)" + } + } + return [join $loaded_libs \n] + } + + proc info {libname} { + if {[catch {package require natsort}]} { + set has_natsort 0 + } else { + set has_natsort 1 + } + catch {package require frobznodule666} ;#ensure pkg system has loaded/searched for everything + set pkgsknown [package names] + if {[set posn [lsearch $pkgsknown $libname]] >= 0} { + puts stdout "Found package [lindex $pkgsknown $posn]" + } else { + puts stderr "Package not found as available library/module - check tcl::tm::list and \$auto_path" + } + set versions [package versions [lindex $libname 0]] + if {$has_natsort} { + set versions [natsort::sort $versions] + } else { + set versions [lsort $versions] + } + if {![llength $versions]} { + puts stderr "No version numbers found for library/module $libname" + return false + } + puts stdout "Versions of $libname found: $versions" + set alphaposn [lsearch $versions "999999.*"] + if {$alphaposn >= 0} { + set alpha [lindex $versions $alphaposn] + #remove and tack onto beginning.. + set versions [lreplace $versions $alphaposn $alphaposn] + set versions [list $alpha {*}$versions] + } + foreach ver $versions { + set loadinfo [package ifneeded $libname $ver] + puts stdout "$libname $ver" + puts stdout "--- 'package ifneeded' script ---" + puts stdout $loadinfo + puts stdout "---" + } + return + } + + proc copyasmodule {library modulefoldername args} { + set defaults [list -askme 1] + set opts [dict merge $defaults $args] + set opt_askme [dict get $opts -askme] + + if {[catch {package require natsort}]} { + set has_natsort 0 + } else { + set has_natsort 1 + } + + catch {package require frobznodule666} ;#ensure pkg system has loaded/searched for everything + + if {[file pathtype $modulefoldername] eq "absolute"} { + if {![file exists $modulefoldername]} { + error "Path '$modulefoldername' not found. Enter a fully qualified path, or just the tail such as 'modules' if you are within the project to use /src/modules" + } + #use the target folder as the source of projectdir info + set pathinfo [punk::repo::find_repos $modulefoldername] + set projectdir [dict get $pathinfo closest] + set modulefolder_path $modulefoldername + } else { + #use the current working directory as the source of projectdir info + set pathinfo [punk::repo::find_repos [pwd]] + set projectdir [dict get $pathinfo closest] + if {$projectdir ne ""} { + set modulefolders [punk::mix::cli::lib::find_source_module_paths $projectdir] + foreach k [list modules vendormodules] { + set knownfolder [file join $projectdir src $k] + if {$knownfolder ni $modulefolders} { + lappend modulefolders $knownfolder + } + } + set mtails [list] + foreach path $modulefolders { + lappend mtails [file tail $path] + } + + #special case bootsupport/modules so it can be referred to as just bootsupport or bootsupport/modules + lappend modulefolders [file join $projectdir src bootsupport/modules] + + if {$modulefoldername ni $mtails && $modulefoldername ni "bootsupport bootsupport/modules"} { + set msg "Suplied modulefoldername '$modulefoldername' doesn't appear to be a known module folder within the project at $projectdir\n" + append msg "Known module folders: [lsort $mtails]\n" + append msg "Use a name from the above list, or a fully qualified path\n" + error $msg + } + + if {$modulefoldername eq "bootsupport"} { + set modulefoldername "bootsupport/modules" + } + set modulefolder_path [file join $projectdir src $modulefoldername] + } else { + set msg "No current project found at or above current directory\n" + append msg "Supplied modulefoldername '$modulefoldername' is a name or relative path - cannot use when outside a project." \n + append msg "Supply an absolute path for the target modulefolder, or try again from within a project directory" \n + error $msg + } + } + puts stdout "-----------------------------" + if {$projectdir ne ""} { + puts stdout "Using projectdir: $projectdir for lib.copyasmodule" + } else { + puts stdout "No current project." + } + puts stdout "-----------------------------" + if {![file exists $modulefolder_path]} { + error "Selected module folder path '$modulefolder_path' doesn't exist. Required subdirectories for namespaced modules will be created automatically - but base selected folder must exist first" + } + + + set libfound [lsearch -all -inline [package names] $library] + if {[llength $libfound] != 1 || ![string length $libfound]} { + error "Library must match exactly one entry in the list of package names visible to the current interpretor: found '$libfound'" + } + + set versions [package versions [lindex $libfound 0]] + if {$has_natsort} { + set versions [natsort::sort $versions] + } else { + set versions [lsort $versions] + } + if {![llength $versions]} { + error "No version numbers found for library/module $libfound - sorry, you will need to copy it across manually" + } + puts stdout "Versions of $libfound found: $versions" + set alphaposn [lsearch $versions "999999.*"] + if {$alphaposn >= 0} { + set alpha [lindex $versions $alphaposn] + #remove and tack onto beginning.. + set versions [lreplace $versions $alphaposn $alphaposn] + set versions [list $alpha {*}$versions] + } + + set ver [lindex $versions end] ;# todo - make selectable! don't assume tail is latest?.. package vcompare? + if {[llength $versions] > 1} { + puts stdout "Version selected: $ver" + } + + set loadinfo [package ifneeded $libfound $ver] + set loadinfo [string map {\r\n \n} $loadinfo] + set loadinfo_lines [split $loadinfo \n] + if {[catch {llength $loadinfo}]} { + set loadinfo_is_listshaped 0 + } else { + set loadinfo_is_listshaped 1 + } + + #check for redirection to differently cased version of self - this is only detected if this is the only command in the package ifneeded result + #- must have matching version. REVIEW this requirement. Is there a legitimate reason to divert to a differently cased other-version? + set is_package_require_self_recased 0 + set is_package_require_diversion 0 + set lib_diversion_name "" + if {[llength $loadinfo_lines] == 1} { + #e.g Thread 3.0b1 diverts to thread 3.0b1 + set line1 [lindex $loadinfo_lines 0] + #check if multiparted with semicolon + #We need to distinguish "package require ; more stuff" from "package require ver> ;" possibly with trailing comment? + set parts [list] + if {[regexp {;} $line1]} { + foreach p [split $line1 {;}] { + set p [string trim $p] + if {[string length $p]} { + #only append parts with some content that doesn't look like a comment + if {![string match "#*" $p]} { + lappend parts $p + } + } + } + + } + if {[llength $parts] == 1} { + #seems like a lone package require statement. + #check if package require, package\trequire etc + if {[string match "package*require" [lrange $line1 0 1]]} { + set is_package_require_diversion 1 + if {[lindex $line1 2] eq "-exact"} { + #package require -exact + set lib_diversion_name [lindex $line1 3] + #check not an exact match - but is a -nocase match - i.e differs in case only + if {($lib_diversion_name ne $libfound) && [string match -nocase $lib_diversion_name $libfound]} { + if {[lindex $line1 4] eq $ver} { + set is_package_require_self_recased 1 + } + } + } else { + #may be package require + #or package require ??... + set lib_diversion_name [lindex $line1 2] + #check not an exact match - but is a -nocase match - i.e differs in case only + if {($lib_diversion_name ne $libfound) && [string match -nocase $lib_diversion_name $libfound]} { + set requiredversions [lrange $line1 3 end] + if {$ver in $requiredversions} { + set is_package_require_self_recased 1 + } + } + } + } + } + } + + if {$is_package_require_self_recased && [string length $lib_diversion_name]} { + #we only follow one level of package require redirection - seems unlikely/imprudent to follow arbitrarily in a while loop(?) + set libfound $lib_diversion_name + set loadinfo [package ifneeded $libfound $ver] + set loadinfo [string map {\r\n \n} $loadinfo] + set loadinfo_lines [split $loadinfo \n] + if {[catch {llength $loadinfo}]} { + set loadinfo_is_listshaped 0 + } else { + set loadinfo_is_listshaped 1 + } + + + } else { + if {$is_package_require_diversion} { + #single + #for now - we'll abort and tell the user to run again with specified pkg/version + #We could automate - but it seems likely to be surprising. + puts stderr "Loadinfo for $libfound seems to be diverting to another pkg/version: $loadinfo_lines" + puts stderr "Review and consider trying with the pkg/version described in the result above." + return + } + } + + + if {$loadinfo_is_listshaped && ([llength $loadinfo] == 2 && [lindex $loadinfo 0] eq "source")} { + set source_file [lindex $loadinfo 1] + } elseif {[string match "*source*" $loadinfo]} { + set parts [list] + foreach ln $loadinfo_lines { + if {![string length $ln]} {continue} + lappend parts {*}[split $ln ";"] + } + set sources_found [list] + set loads_found [list] + set dependencies [list] + set incomplete_lines [list] + foreach p $parts { + set p [string trim $p] + if {![string length $p]} { + continue ;#empty line or trailing colon + } + if {[string match "*tclPkgSetup*" $p]} { + puts stderr "Unable to process load script for library $libfound" + puts stderr "The library appears to use the deprecated tcl library support utility 'tclPkgSetup'" + return false + } + if {![::info complete $p]} { + # + #probably a perfectly valid script - but slightly more complicated than we can handle + #better to defer to manual processing + lappend incomplete_lines $p + continue + } + if {[lindex $p 0] eq "source"} { + #may have args.. e.g -encoding utf-8 + lappend sources_found [lindex $p end] + } + if {[lindex $p 0] eq "load"} { + lappend loads_found [lrange $p 1 end] + } + if {[lrange $p 0 1] eq "package require"} { + lappend dependencies [lrange $p 2 end] + } + } + if {[llength $incomplete_lines]} { + puts stderr "unable to interpret load script for library $libfound" + puts stderr "Load info: $loadinfo" + return false + } + if {[llength $loads_found]} { + puts stderr "package $libfound appears to have binary components" + foreach l $loads_found { + puts stderr " binary - $l" + } + foreach s $sources_found { + puts stderr " script - $s" + } + puts stderr "Unable to automatically copy binary libraries to your module folder." + return false + } + + if {[llength $sources_found] != 1} { + puts stderr "sorry - unable to interpret source library location" + puts stderr "Only 1 source supported for now" + puts stderr "Load info: $loadinfo" + return false + } + if {[llength $dependencies]} { + #todo - check/ignore if dependency is Tcl ? + puts stderr "WARNING the package appears to depend on at least one other. Review and copy dependencies if required." + foreach d $dependencies { + puts stderr " - $d" + } + } + + set source_file [lindex $sources_found 0] + } else { + puts stderr "sorry - unable to interpret source library location" + puts stderr "Load info: $loadinfo" + return false + } + + # -- --------------------------------------- + #Analyse source file + if {![file exists $source_file]} { + error "Unable to verify source file existence at: $source_file" + } + set source_data [fcat -translation binary $source_file] + if {![string match "*package provide*" $source_data]} { + puts stderr "Sorry - unable to verify source file contains 'package provide' statement of some sort - copy manually" + return false + } else { + if {![string match "*$libfound*" $source_data]} { + # as an exception - look for the specific 'package provide $pkg $version' as occurs in the auto-name auto-version modules + #e.g anyname-0.1.tm example + if {![string match "*package provide \$pkg \$version*" $source_data]} { + puts stderr "Sorry - unable to verify source file contains 'package provide' and '$libfound' - copy manually" + return false + } + } + } + + + if {[string match "*lappend ::auto_path*" $source_data] || [string match "*lappend auto_path*" $source_data] || [string match "*set ::auto_path*" $source_data]} { + puts stderr "Sorry - '$libfound' source file '$source_file' appears to rely on ::auto_path and can't be automatically copied as a .tm module" + puts stderr "Copy the library across to a lib folder instead" + return false + } + # -- --------------------------------------- + + set moduleprefix [punk::ns::nsprefix $libfound] + if {[string length $moduleprefix]} { + set moduleprefix_parts [punk::ns::nsparts $moduleprefix] + set relative_path [file join {*}$moduleprefix_parts] + } else { + set relative_path "" + } + set pkgtail [punk::ns::nstail $libfound] + set target_path [file join $modulefolder_path $relative_path ${pkgtail}-${ver}.tm] + + if {$opt_askme} { + puts stdout "WARNING - you should check that there aren't extra required files for the library/modules" + puts stdout "" + puts stdout "This is not intended for binary modules - use at own risk and check results" + puts stdout "" + puts stdout "Base module path: $modulefolder_path" + puts stdout "Target path : $target_path" + puts stdout "results of 'package ifneeded $libfound'" + puts stdout "---" + puts stdout "$loadinfo" + puts stdout "---" + set question "Proceed to create ${pkgtail}-${ver}.tm module? Y|N" + set answer [punk::lib::askuser $question] ;#takes account of previous stdin state and terminal raw vs line state + if {[string tolower $answer] ne "y"} { + puts stderr "mix libcopy.asmodule aborting due to user response '$answer' (required Y|y to proceed) use -askme 0 to avoid prompts." + return + } + } + + if {![file exists $modulefolder_path]} { + puts stdout "Creating module base folder at $modulefolder_path" + file mkdir $modulefolder_path + } + if {![file exists [file dirname $target_path]]} { + puts stdout "Creating relative folder at [file dirname $target_path]" + file mkdir [file dirname $target_path] + } + + if {[file exists $target_path]} { + puts stdout "WARNING - module already exists at $target_path" + if {$opt_askme} { + set question "Copy anyway? Y|N" + set answer [punk::lib::askuser $question] + if {[string tolower $answer] ne "y"} { + puts stderr "mix libcopy.asmodule aborting due to user response '$answer' (required Y|y to proceed) use -askme 0 to avoid prompts." + return + } + } + } + + file copy -force $source_file $target_path + + return $target_path + } + + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::loadedlib [namespace eval punk::mix::commandset::loadedlib { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm new file mode 100644 index 00000000..9955c53b --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm @@ -0,0 +1,518 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'deck make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::module 0.1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::repo +# depends on punk,punk::mix::base,punk::mix::cli + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::module { + namespace export * + + proc paths {} { + set roots [punk::repo::find_repos ""] + set project [lindex [dict get $roots project] 0] + if {$project ne ""} { + set is_project 1 + set searchbase $project + } else { + set is_project 0 + set searchbase [pwd] + } + + if {[catch { + set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $searchbase] + } errMsg]} { + set source_module_folderlist [list] + } + + set tm_folders [tcl::tm::list] + package require overtype + + set result "" + if {$is_project} { + append result "Project module source paths:" \n + foreach f $source_module_folderlist { + append result "$f" \n + } + } + append result \n + append result "tcl::tm::list" \n + foreach f $tm_folders { + if {$is_project} { + if {[punk::mix::cli::lib::path_a_aboveorat_b $project $f]} { + set pinfo "(within project)" + } else { + set pinfo "" + } + } else { + set pinfo "" + } + set warning "" + if {![file isdirectory $f]} { + set warning "(PATH NOT FOUND)" + } + append result "$f $pinfo $warning" \n + } + + + return $result + } + #require current dir when calling to be the projectdir, or + proc templates {args} { + set tdict_low_to_high [templates_dict {*}$args] + #convert to screen order - with higher priority at the top + set tdict [dict create] + foreach k [lreverse [dict keys $tdict_low_to_high]] { + dict set tdict $k [dict get $tdict_low_to_high $k] + } + + package require overtype + package require textblock + #set pathinfolist [dict values $tdict] + #set paths [lsearch -all -inline -index 1 -subindices $pathinfolist *] ;#relies on first key of templates_dict being path + + set names [dict keys $tdict] + set paths [list] + set pathtypes [list] + dict for {nm tinfo} $tdict { + lappend paths [dict get $tinfo path] + lappend pathtypes [dict get $tinfo sourceinfo pathtype] + } + + set title(path) "Path" + set widest(path) [tcl::mathfunc::max {*}[lmap v [concat [list $title(path)] $paths] {punk::strlen $v}]] + set col(path) [string repeat " " $widest(path)] + + set title(pathtype) "[a+ green]Path Type[a]" + set widest(pathtype) [tcl::mathfunc::max {*}[lmap v [concat [list $title(pathtype)] $pathtypes] {string length $v}]] + set col(pathtype) [string repeat " " $widest(pathtype)] + + set title(name) "Template Name" + set widest(name) [tcl::mathfunc::max {*}[lmap v [concat [list $title(name)] $names] {string length $v}]] + set col(name) [string repeat " " $widest(name)] + + set tablewidth [expr {$widest(name) + 1 + $widest(pathtype) + 1 + $widest(name)}] + set table "" + append table [string repeat - $tablewidth] \n + append table "[textblock::join -- [overtype::left $col(name) $title(name)] " " [overtype::left $col(pathtype) $title(pathtype)] " " [overtype::left $col(path) $title(path)]]" \n + append table [string repeat - $tablewidth] \n + + foreach n $names pt $pathtypes p $paths { + append table "[overtype::left $col(name) $n] [overtype::left $col(pathtype) $pt] [overtype::left $col(path) $p]" \n + } + + return $table + } + #return all module templates with repeated ones suffixed with .2 .3 etc + proc templates_dict {args} { + set argspec { + *proc -name templates_dict -help "Templates from module and project paths" + -startdir -default "" -help "Project folder used in addition to module paths" + -not -default "" -multiple 1 + *values + globsearches -default * -multiple 1 + } + set argd [punk::args::get_dict $argspec $args] + package require punk::cap + if {[punk::cap::capability_has_handler punk.templates]} { + set template_folder_dict [punk::cap::call_handler punk.templates get_itemdict_moduletemplates {*}$args] + } else { + put stderr "get_template_basefolders WARNING - no handler available for the 'punk.templates' capability - template providers will be unable to provide template locations" + } + } + proc new {args} { + set year [clock format [clock seconds] -format %Y] + set moduletypes [punk::mix::cli::lib::module_types] + # use \uFFFD because unicode replacement char should consistently render as 1 wide + set argspecs [subst { + -project -default \uFFFD + -version -default \uFFFD + -license -default + -template -default punk.module + -type -default \uFFFD -choices {$moduletypes} + -force -default 0 -type boolean + -quiet -default 0 -type boolean + *values -min 1 -max 1 + module -type string + }] + set argd [punk::args::get_dict $argspecs $args] + lassign [dict values $argd] opts values + set module [dict get $values module] + + #set opts [dict merge $defaults $args] + + #todo - review compatibility between -template and -type + #-type is the wrapping technology e.g 'plain' for none or tarjar or zip (modpod) etc (consider also snappy/snappy-tcl) + #-template may be a folder - but only if the selected -type suports it + + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + # option -version + # we need this value before looking at the named argument + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_version_supplied [dict get $opts -version] + if {$opt_version_supplied eq "\uFFFD"} { + set opt_version "0.1.0" + } else { + set opt_version $opt_version_supplied + if {![util::is_valid_tm_version $opt_version]} { + error "deck module.new error - supplied -version $opt_version doesn't appear to be a valid Tcl module version" + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #named argument + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set mversion_supplied "" ;#version supplied directly in module argument + if {[string first - $module]> 0} { + #if it has a dash then version is required to be valid + lassign [punk::mix::cli::lib::split_modulename_version $module] modulename mversion + if {![util::is_valid_tm_version $mversion]} { + error "deck module.new error - unable to determine modulename-version from supplied value '$module'" + } + set mversion_supplied $mversion ;#record as may need to compare to version from templatefile name + set vcompare_is_mversion_bigger [package vcompare $mversion $opt_version] + if {$vcompare_is_mversion_bigger > 0} { + set opt_version $mversion; #module parameter has higher value than -version + set vmsg "from module argument: $module" + } else { + set vmsg "from -version option: $opt_version_supplied" + } + if {$opt_version_supplied ne "\uFFFD"} { + if {$vcompare_is_mversion_bigger != 0} { + #is bigger or smaller + puts stderr "module.new WARNING: version supplied in module argument as well as -version option. Using the higher version number $vmsg" + } + } + } else { + set modulename $module + } + punk::mix::cli::lib::validate_modulename $modulename -errorprefix "punk::mix::commandset::module::new" + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + #options + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_project [dict get $opts -project] + set testdir [pwd] + if {![string length [set projectdir [punk::repo::find_project $testdir]]]} { + if {![string length [set projectdir [punk::repo::find_candidate $testdir]]]} { + set msg [punk::repo::is_candidate_root_requirements_msg] + error "module.new unable to create module in projectdir:$projectdir - directory doesn't appear to meet basic standards $msg" + } + } + if {$opt_project == "\uFFFF"} { + set projectname [file tail $projectdir] + } else { + set projectname $opt_project + if {$projectname ne [file tail $projectdir]} { + error "module.new -project '$opt_project' doesn't match detected projectname '$projectname' at path: $projectdir" + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_license [dict get $opts -license] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_template [dict get $opts -template] + if {[regexp {.*[?*].*} $opt_template]} { + error "module.new -template does not support glob chars. Use an exact full name including version (and optionally .tm) - or use just the name without version or .tm, and the latest version will be selected" + } + + set templates_dict [templates_dict] ;#keys are possibly prefixed with . and/or suffixed with #2 #3 etc if there are collisions - the remaining unsuffixed being the one with highest preference + #todo - allow versionless name - pick latest which isn't suffixed with #2 etc + #if the user wants to exactly match an unversioned template, in the presence of versioned ones - they may need to include the trailing .tm + if {[dict exists $templates_dict $opt_template]} { + #exact long name (possibly including version) + #Note - an unversioned .tm template will be matched here - even though versioned templates of the same name may exist. + set templatefile [dict get $templates_dict $opt_template path] + set templatefile_info [dict get $templates_dict $opt_template sourceinfo] + } else { + #if it wasn't an exact match for opt_template - then opt_template now shouldn't contain a version (we have also ruled out glob chars * & ? above) + #(if it does - then we just won't find anything - which is fine) + #module file name could contain dots - but only one dash - if it is versioned + + set matches [lsearch -all -inline [dict keys $templates_dict] $opt_template-*] ;#the key is of form vendor.modulename-version(#suffix) (version optional, suffix if lower precedence with same name was found) + #only .tm (or .TM .Tm .tM) files make it into the templates_dict - they are allowed to be unversioned though. + set key_version_list [list] + foreach m $matches { + #vendorname could contain dashes or dots - so easiest way to split out is to examine the stored vendor value in sourceinfo + set vendor [dict get $templates_dict $m sourceinfo vendor] + if {$vendor ne "_project"} { + #_project special case - not included in module names + set module $m + } else { + set module [string range [string length $vendor.] end] + } + lassign [punk::mix::cli::lib::split_modulename_version $m] _tailmname mversion + lappend key_version_list [list $m $mversion] + } + if {[llength $matches]} { + set highest_m "" + set highest_v "" + foreach kv $key_version_list { + if {$highest_v eq ""} { + set highest_m [lindex $kv 0] + set highest_v [lindex $kv 1] + } else { + if {[package vcompare $highest_v [lindex $kv 1]] == -1} { + set highest_m [lindex $kv 0] + set highest_v [lindex $kv 1] + } + } + } + set templatefile [dict get $templates_dict $highest_m path] + set templatefile_info [dict get $templates_dict $highest_m sourceinfo] + } else { + error "module.new unable to find template '$opt_template'. [dict size $templates_dict] Known templates. Use deck module.templates to display" + } + } + + + set tpldir [file dirname $templatefile] ;#use same folder for modulename_buildversion.txt, modulename_description.txt if they exist + + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_type [dict get $opts -type] + if {$opt_type eq "\uFFFD"} { + set opt_type [lindex [punk::mix::cli::lib::module_types] 0] ;#default to plain + } + if {$opt_type ni [punk::mix::cli::lib::module_types]} { + error "module.new - error - unknown -type '$opt_type' known-types: [punk::mix::cli::lib::module_types]" + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_quiet [dict get $opts -quiet] + set opt_force [dict get $opts -force] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + + + + + set subpath [punk::mix::cli::lib::module_subpath $modulename] ;#commonly empty string for simple modulename e.g "mymodule" but x::mymodule has subpath 'x' and x::y::mymodule has subpath 'x/y' + if {![string length $subpath]} { + set modulefolder $projectdir/src/modules + } else { + set modulefolder $projectdir/src/modules/$subpath + } + file mkdir $modulefolder + + set moduletail [namespace tail $modulename] + set magicversion [punk::mix::util::magic_tm_version] ;#deliberately large so given load-preference when testing + + + + + set template_tail [file tail $templatefile] ;#convert template_xxx-version.tm.x to {xxx version} + set template_tail [string range $template_tail [string length template_] end] + set ext [string tolower [file extension $template_tail]] + if {$ext eq ".tm"} { + set template_modulename_part [file rootname $template_tail] + } elseif {[string is integer -strict [string range $ext 1 end]]} { + #something like modulename-0.0.1.tm.2 + #strip of last 2 dotted parts + set shortened [file rootname $template_tail] + if {![string equal -nocase [file extension $shortened] ".tm"]} { + error "module.new error: Unable to interpret filename components of template file '$templatefile' (expected .tm as second-last or last component)" + } + set template_modulename_part [file rootname $shortened] + } else { + error "module.new error: Unable to interpret filename components of template file '$templatefile'" + } + lassign [punk::mix::cli::lib::split_modulename_version $template_modulename_part] t_mname t_version + #t_version may be empty string if template is unversioned e.g template_whatever.tm + + set fd [open $templatefile r]; set template_filedata [read $fd]; close $fd + if {[string match "*$magicversion*" $template_filedata]} { + set use_magic 1 + set build_version $opt_version + set infile_version $magicversion + } else { + set use_magic 0 + if {$opt_version_supplied ne "\uFFFF"} { + set build_version $opt_version + } else { + # + + if {[util::is_valid_tm_version $t_version]} { + if {$mversion_supplied eq ""} { + set build_version $t_version + } else { + #we have a version from the named argument 'module' + if {[package vcompare $mversion_supplied $t_version] > 0} { + set build_version $mversion_supplied + } else { + set build_version $t_version + } + } + } else { + #probably an unversioned module template + #use opt_version default from above + set build_version $opt_version + } + } + set infile_version $build_version + } + + set moduletemplate [file join $projectname [punk::path::relative $projectdir $templatefile]] ;#if templatfile is on another volume - just $templatefile will be returned. + #moduletemplate should usually be a relative path - but could be absolute, or contain info about the relative locations of projectdir vs templatefile if template comes from another project or a module outside the project + #This path info may be undesired in the template output (%moduletemplate%) + #it is nevertheless useful information - and not the only way developer-machine/build-machine paths can leak + #for now the user has the option to override any templates and remove %moduletemplate% if it is a security/privacy concern + + #Don't put literal %x% in the code for the commandset::module itself - to stop them being seen by layout scanner as replacable tokens + set tagnames [list moduletemplate $moduletemplate project $projectname pkg $modulename year $year license $opt_license version $infile_version] + set strmap [list] + foreach {tag val} $tagnames { + lappend strmap %$tag% $val + } + set template_filedata [string map $strmap $template_filedata] + + set tmfile $modulefolder/${moduletail}-$infile_version.tm + set podfile $modulefolder/#modpod-$moduletail-$infile_version/$moduletail-$infile_version.tm + set has_tm [file exists $tmfile] + set has_pod [file exists $podfile] + if {$has_tm && $has_pos} { + #invalid configuration - bomb out + error "module.new error: Invalid target configuration found. module folder has both a .tm file $tmfile and a modpod file $podfile. Please delete one of them before trying again." + } + if {$opt_type eq "plain"} { + set modulefile $tmfile + } else { + set modulefile $podfile + } + if {$has_tm || $has_pod} { + if {!$opt_force} { + if {$has_tm} { + set errmsg "module.new error: module file $tmfile already exists - aborting" + } else { + set errmsg "module.new error: module file $podfile already exists - aborting" + } + if {[string match "*$magicversion*" $tmfile]} { + append errmsg \n "If you are attempting to create a module file with a specific version in the source-file name - you will need to use a template that doesn't contain the string '$magicversion' e.g the provided template moduleexactversion-0.0.1.tm" + } + error $errmsg + } else { + #review - prompt here vs caller? + #we are committed to overwriting/replacing if there was a pre-existing module of same version + if {$has_pod} { + file delete -force [file dirname $podfile] + } elseif {$has_tm} { + file delete -force $tmfile + } + } + } + + + if {[file exists $tpldir/modulename_buildversion.txt]} { + set fd [open $tpldir/modulename_buildversion.txt r]; set buildversion_filedata [read $fd]; close $fd + } else { + #mix_templates_dir warns of deprecation - review + set lib_tpldir [file join [punk::mix::cli::lib::mix_templates_dir] modules];#fallback for modulename_buildversion.txt, modulename_description.txt + set fd [open $lib_tpldir/modulename_buildversion.txt r]; set buildversion_filedata [read $fd]; close $fd + } + set buildversionfile [file join $modulefolder ${moduletail}-buildversion.txt] + set existing_build_version "" + if {[file exists $buildversionfile]} { + set buildversiondata [punk::mix::util::fcat $buildversionfile] + set lines [split $buildversiondata \n] + set existing_build_version [string trim [lindex $lines 0]] + if {[package vcompare $existing_build_version $build_version] >= 0} { + #existing version in -buildversion.txt file is lower than the module version we are creating + error "module.new error: there is an existing buildversion file $buildversionfile with version $existing_build_version equal to or higher than $build_version - unable to continue" + } + } + + set existing_tm_versions [glob -nocomplain -dir $modulefolder -tails ${moduletail}-*.tm] + #it shouldn't be possible to overmatch with the glob - because '-' is not valid in a Tcl module name + set existing_pod_versions [glob -nocomplain -dir $modulefolder -tails #modpod-$moduletail-*] + set existing_versions [concat $existing_tm_versions $existing_pod_versions] + + if {[llength $existing_versions]} { + set name_version_pairs [list] + lappend name_version_pairs [list $moduletail $infile_version] + foreach existing $existing_versions { + lassign [punk::mix::cli::lib::split_modulename_version $existing] namepart version ;# .tm is stripped and ignored + if {[string match #modpod-* $namepart]} { + set namepart [string range $namepart 8 end] + } + lappend name_version_pairs [list $namepart $version] + } + set name_version_pairs [lsort -command {package vcompare} -index 1 $name_version_pairs] ;#while plain lsort will often work with versions - it can get order wrong with some - so use package vcompare + if {[lindex $name_version_pairs end] ne [list $moduletail $infile_version]} { + set thisposn [lsearch -index 1 $name_version_pairs $infile_version] + set name_version_pairs [lreplace $name_version_pairs $thisposn $thisposn] + set other_versions [lsearch -all -inline -index 1 -subindices $name_version_pairs *] + set errmsg "module.new error: There are existing modules in the target folder with higher versions than $infile_version." + append errmsg \n "Other versions found: $other_versions" + if {$magicversion in $other_versions} { + append errmsg \n "Existing build version for special source file name: '$magicversion' is: '$existing_build_version'" + append errmsg \n "If '$magicversion' file doesn't represent the latest source it should be removed or the filename and contents adjusted to be a specific version" + } + error $errmsg + } else { + puts stderr "module.new WARNING: There are existing modules in the target folder with lower versions than $infile_version - manual review recommended" + puts stderr "Other versions found: [lsearch -all -inline -index 1 -subindices [lrange $name_version_pairs 0 end-1] *]" + } + } + + if {!$opt_quiet} { + puts stdout "Creating $modulefile from template $moduletemplate" + } + file mkdir [file dirname $modulefile] + + set fd [open $modulefile w] + fconfigure $fd -translation binary + puts -nonewline $fd $template_filedata + close $fd + + + set buildversion_filedata [string map [list %Major.Minor.Level% $build_version] $buildversion_filedata] + set fd [open $buildversionfile w] + fconfigure $fd -translation binary + puts -nonewline $fd $buildversion_filedata + close $fd + + return [list file $modulefile version $build_version] + } + + namespace eval lib { + + + } + + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::module [namespace eval punk::mix::commandset::module { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm new file mode 100644 index 00000000..9cac531c --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/project-0.1.0.tm @@ -0,0 +1,1026 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'deck make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::project 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::mix::commandset::project 0 0.1.0] +#[copyright "2023"] +#[titledesc {dec commandset - project}] [comment {-- Name section and table of contents description --}] +#[moddesc {deck CLI commandset - project}] [comment {-- Description at end of page heading --}] +#[require punk::mix::commandset::project] +#[description] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::mix::commandset::project +#[para]Import into an ensemble namespace similarly to the way it is done with punk::mix::cli e.g +#[example { +# namespace eval myproject::cli { +# namespace export * +# namespace ensemble create +# package require punk::overlay +# +# package require punk::mix::commandset::project +# punk::overlay::import_commandset project . ::punk::mix::commandset::project +# punk::overlay::import_commandset projects . ::punk::mix::commandset::project::collection +# } +#}] +#[para] Where the . in the above example is the prefix/command separator +#[para]The prefix ('project' in the above example) can be any string desired to disambiguate commands imported from other commandsets. +#[para]The above results in the availability of the ensemble command: ::myproject::cli project.new, which is implemented in ::punk::mix::commandset::project::new +#[para]Similarly, procs under ::punk::mix::commandset::project::collection will be available as subcommands of the ensemble as projects. +#[para] +#[subsection Concepts] +#[para] see punk::overlay + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::mix::commandset::project +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package punk::ns] +#[item] [package sqlite3] (binary) +#[item] [package overtype] +#[item] [package textutil] (tcllib) + + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::project { + namespace export * + #*** !doctools + #[subsection {Namespace punk::mix::commandset::project}] + #[para] core commandset functions for punk::mix::commandset::project + #[list_begin definitions] + + proc _default {} { + package require punk::ns + set dispatched_to [lindex [info level 2] 0] ;#e.g ::punk::mix::cli::project + set dispatch_tail [punk::ns::nstail $dispatched_to] + set dispatch_ensemble [punk::ns::nsprefix $dispatched_to] ;#e.g ::punk::mix::cli + set sibling_commands [namespace eval $dispatch_ensemble {namespace export}] + #todo - get separator? + set sep "." + set result [list] + foreach sib $sibling_commands { + if {[string match ${dispatch_tail}${sep}* $sib]} { + lappend result $sib + } + } + return [lsort $result] + } + + + + + proc new {newprojectpath_or_name args} { + #*** !doctools + # [call [fun new] [arg newprojectpath_or_name] [opt args]] + #new project structure - may be dedicated to one module, or contain many. + #create minimal folder structure only by specifying in args: -modules {} + if {[file pathtype $newprojectpath_or_name] eq "absolute"} { + set projectfullpath [file normalize $newprojectpath_or_name] + set projectname [file tail $projectfullpath] + set projectparentdir [file dirname $newprojectpath_or_name] + } else { + set projectfullpath [file join [pwd] $newprojectpath_or_name] + set projectname [file tail $projectfullpath] + set projectparentdir [file dirname $projectfullpath] + } + if {[file type $projectparentdir] ne "directory"} { + error "punk::mix::cli::new error: unable to determine containing folder for '$newprojectpath_or_name'" + } + + punk::mix::cli::lib::validate_projectname $projectname -errorprefix "punk mix project.new" + + + set defaults [list\ + -type plain\ + -empty 0\ + -force 0\ + -update 0\ + -confirm 1\ + -modules \uFFFF\ + -layout punk.project + ] ;#todo + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "project.new error: option '$k' not known. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_type [dict get $opts -type] + if {$opt_type ni [punk::mix::cli::lib::module_types]} { + error "deck new error - unknown type '$opt_type' known types: [punk::mix::cli::lib::module_types]" + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_force [dict get $opts -force] + set opt_confirm [string tolower [dict get $opts -confirm]] + # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_modules [dict get $opts -modules] + if {[llength $opt_modules] == 1 && [lindex $opt_modules 0] eq "\uFFFF"} { + #if not specified - add a single module matching project name + set opt_modules [list $projectname] + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_layout [dict get $opts -layout] + set opt_update [dict get $opts -update] + # -- --- --- --- --- --- --- --- --- --- --- --- --- + + + #todo - install support binaries on a per-project basis in a way that doesn't impact machine (e.g not added to path) - cache in user config dir if possible, supply mechanism to clear cache + # + set fossil_prog [auto_execok fossil] + if {![string length $fossil_prog]} { + puts stderr "The fossil program was not found. A fossil executable is required to use most deck features." + if {[string length [set scoop_prog [auto_execok scoop]]]} { + #restrict to windows? + set answer [util::askuser "scoop detected. Would you like deck to install fossil now using scoop? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -confirm 0 to avoid prompts." + return + } + #we don't assume 'unknown' is configured to run shell commands + if {[string length [package provide shellrun]]} { + set exitinfo [run {*}$scoop_prog install fossil] + #scoop tends to return successful exitcode (0) even when packages not found etc. - so exitinfo not much use. + puts stdout "scoop install fossil ran with result: $exitinfo" + } else { + puts stdout "Please wait while scoop runs - there may be a slight delay and then scoop output will be shown. (use punk shellrun package for )" + set result [exec {*}$scoop_prog install fossil] + puts stdout $result + } + catch {::auto_reset} ;#can be missing (unsure under what circumstances - but I've seen it raise error 'invalid command name "auto_reset"') + if {![string length [auto_execok fossil]]} { + puts stderr "Fossil still not detected. If it was successfully installed, try restarting your punk/tcl shell." + return + } + #todo - ask user if they want to configure fosssil first.. + set answer [util::askuser "Fossil command now appears to be available. You may wish to answer N to exit and customize it - but default config may be ok. Type the word 'continue' to proceed with default configuration."] + if {[string tolower $answer] ne "continue"} { + return + } + + } else { + puts stdout "See: https://fossil-scm.org/home/uv/download.html" + if {"windows" eq $::tcl_platform(platform)} { + puts stdout "Consider using a package manager such as scoop: https://scoop.sh" + puts stdout "(Then: scoop install fossil)" + } + return + } + } + set startdir [pwd] + if {[set in_project [punk::repo::find_project $startdir]] ne ""} { + # use this project as source of templates + puts stdout "-------------------------------------------" + puts stdout "Currently in a project directory '$in_project'" + puts stdout "This project will be searched for templates" + puts stdout "-------------------------------------------" + } + + + package require punk::cap + if {[punk::cap::capability_has_handler punk.templates]} { + set layout_dict [punk::cap::call_handler punk.templates get_itemdict_projectlayouts] + } else { + put stderr "commandset::project::new WARNING - no handler available for the 'punk.templates' capability - template providers will be unable to provide layout locations" + return + } + if {[dict exists $layout_dict $opt_layout]} { + set layout_name $opt_layout + set layout_info [dict get $layout_dict $layout_name] + set layout_path [dict get $layout_info path] + set layout_sourceinfo [dict get $layout_info sourceinfo] + } else { + puts stderr "commandset::project::new - no exact match for specified layout-name $opt_layout found" + puts stderr "layout names found: [dict keys $layout_dict]" + return + + #todo - pick highest version layout that matches opt_layout if version not specified but multiple exist + + #set layout_name ... + #set layout_info .. + #set layout_path ... + } + + + + #todo - detect whether inside cwd-project or inside a different project + set projectdir $projectparentdir/$projectname + if {[set target_in_project [punk::repo::find_project $projectparentdir]] ne ""} { + puts stderr "Target location for new project is already within a project: $target_in_project" + error "Nested projects not yet supported aborting" + } + + + + if {[punk::repo::is_git $projectparentdir]} { + puts stderr "mix new WARNING: target project location is within a git repo based at [punk::repo::find_git $projectparentdir]" + puts stderr "The new project will create a fossil repository (which you are free to ignore - but but will be used to confirm project base)" + puts stderr "If you intend to use both git and fossil in the same project space - you should research and understand the details and any possible interactions/issues" + set answer [util::askuser "Do you want to proceed to create a project based at: $projectdir? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -confirm 0 to avoid prompts." + return + } + } + set is_nested_fossil 0 ;#default assumption + if {[punk::repo::is_fossil $projectparentdir]} { + puts stderr "mix new WARNING: target project location is within an open fossil repo based at [punk::repo::find_fossil $projectparentdir] NESTED fossil repository" + if {$opt_confirm ni [list 0 no false]} { + puts stderr "If you proceed - the new project's fossil repo will be created using the --nested flag" + set answer [util::askuser "Do you want to proceed to create a NESTED project based at: $projectdir? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -confirm 0 to avoid prompts." + return + } + set is_nested_fossil 1 + } + } + + + set project_dir_exists [file exists $projectdir] + if {$project_dir_exists && !($opt_force || $opt_update)} { + puts stderr "Unable to create new project at $projectdir - file/folder already exists use -update 1 to fill in missing items from template use -force 1 to overwrite from template" + return + } elseif {$project_dir_exists && $opt_force} { + puts stderr "mix new WARNING: -force 1 was supplied. Will copy layout $layout_path using -force option to overwrite from template" + if {$opt_confirm ni [list 0 no false]} { + set answer [util::askuser "Do you want to proceed to possibly overwrite existing files in $projectdir? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -confirm 0 to avoid prompts." + return + } + } + } elseif {$project_dir_exists && $opt_update} { + puts stderr "mix new WARNING: -update 1 was supplied. Will copy layout $layout_path using -update option to add missing items" + } + + set fossil_repo_file "" + set is_fossil_root 0 + if {$project_dir_exists && [punk::repo::is_fossil_root $projectdir]} { + set is_fossil_root 1 + set fossil_repo_file [punk::repo::fossil_get_repository_file $projectdir] + if {$fossil_repo_file ne ""} { + set repodb_folder [file dirname $fossil_repo_file] + } + } + + if {$fossil_repo_file eq ""} { + set repodb_folder [punk::repo::fossil_get_repository_folder_for_project $projectname -parentfolder $startdir] + if {![string length $repodb_folder]} { + puts stderr "No usable repository database folder selected for $projectname.fossil file" + return + } + } + if {[file exists $repodb_folder/$projectname.fossil]} { + puts stdout "NOTICE: $repodb_folder/$projectname.fossil already exists" + if {!($opt_force || $opt_update)} { + puts stderr "-force 1 or -update 1 not specified - aborting" + return + } + #review + set fossil_repo_file $repodb_folder/$projectname.fossil + } + + if {$fossil_repo_file eq ""} { + puts stdout "Initialising fossil repo: $repodb_folder/$projectname.fossil" + set fossilinit [runx -n {*}$fossil_prog init $repodb_folder/$projectname.fossil -project-name $projectname] + if {[dict get $fossilinit exitcode] != 0} { + puts stderr "fossil init failed:" + puts stderr [dict get $fossilinit stderr] + return + } else { + puts stdout "fossil init result:" + puts stdout [dict get $fossilinit stdout] + } + } + + # + + + + file mkdir $projectdir + + puts stdout ">>> about to call punkcheck::install $layout_path $projectdir" + set resultdict [dict create] + set antipaths [list\ + src/doc/*\ + src/doc/include/*\ + src/PROJECT_LAYOUTS_*\ + ] + + #set antiglob_dir [list\ + # _ignore_*\ + #] + set antiglob_dir [list\ + ] + + #default antiglob_dir_core will stop .fossil* from being updated - which is generally desirable as these are likely to be customized + if {$opt_force} { + puts stdout "copying layout files - with force applied - overwrite all-targets" + set resultdict [punkcheck::install $layout_path $projectdir -installer project.new -overwrite ALL-TARGETS -antiglob_paths $antipaths -antiglob_dir $antiglob_dir] + } else { + puts stdout "copying layout files - (if source file changed)" + set resultdict [punkcheck::install $layout_path $projectdir -installer project.new -overwrite installedsourcechanged-targets -antiglob_paths $antipaths -antiglob_dir $antiglob_dir] + } + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + + puts stdout "copying layout src/doc files (if target missing)" + set resultdict [punkcheck::install $layout_path/src/doc $projectdir/src/doc -punkcheck_folder $projectdir -installer project.new -overwrite SYNCED-TARGETS] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + + #target folders .fossil-custom and .fossil-settings may not exist. use -createdir 1 to ensure existence. + #In this case we need to override the default dir antiglob - as .fossil-xxx folders need to be installed from template if missing, or if target is uncustomized. + ## default_antiglob_dir_core [list "#*" "_aside" ".git" ".fossil*"] + set override_antiglob_dir_core [list #* _aside .git] + puts stdout "copying layout src/.fossil-custom files (if target missing or uncustomised)" + set resultdict [punkcheck::install $layout_path/.fossil-custom $projectdir/.fossil-custom -createdir 1 -punkcheck_folder $projectdir -installer project.new -antiglob_dir_core $override_antiglob_dir_core -overwrite SYNCED-TARGETS] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + + puts stdout "copying layout src/.fossil-settings files (if target missing or uncustomised)" + set resultdict [punkcheck::install $layout_path/.fossil-settings $projectdir/.fossil-settings -createdir 1 -punkcheck_folder $projectdir -installer project.new -antiglob_dir_core $override_antiglob_dir_core -overwrite SYNCED-TARGETS] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + + #scan all files in template + # + #TODO - deck command to substitute templates? + set templatefiles [punk::mix::commandset::layout::lib::layout_scan_for_template_files $opt_layout] + set stripprefix [file normalize $layout_path] + + set tagmap [list [lib::template_tag project] $projectname] + if {[llength $templatefiles]} { + puts stdout "Filling template file placeholders with the following tag map:" + foreach {placeholder value} $tagmap { + puts stdout " $placeholder -> $value" + } + } + foreach templatefullpath $templatefiles { + set templatetail [punk::repo::path_strip_alreadynormalized_prefixdepth $templatefullpath $stripprefix] + + set fpath [file join $projectdir $templatetail] + if {[file exists $fpath]} { + set fd [open $fpath r]; fconfigure $fd -translation binary; set data [read $fd]; close $fd + set data2 [string map $tagmap $data] + if {$data2 ne $data} { + puts stdout "updated template file: $fpath" + set fdout [open $fpath w]; fconfigure $fdout -translation binary; puts -nonewline $fdout $data2; close $fdout + } + } else { + puts stderr "warning: Missing template file $fpath" + } + } + #todo - tag substitutions in src/doc tree + + ::cd $projectdir + + if {[file exists $projectdir/src/modules]} { + foreach m $opt_modules { + #check if mod-ver.tm file or #modpod-mod-ver folder exist + set tmfile $projectdir/src/modules/$m-[punk::mix::util::magic_tm_version].tm + set podfile $projectdir/src/modules/#modpod-$m-[punk::mix::util::magic_tm_version]/$m-[punk::mix::util::magic_tm_version].tm + + set has_tm [file exists $tmfile] + set has_pod [file exists $podfile] + #puts stderr "=====> has_tm: $has_tm has_pod: $has_pod" + if {!$has_tm && !$has_pod} { + #todo - option for -module_template - and check existence at top? or change opt_modules to be a list of dicts with configuration info -template -type etc + punk::mix::commandset::module::new -project $projectname -type $opt_type $m + } else { + #we should rarely if ever want to force any src/modules to be overwritten + if {$opt_force} { + if {$has_pod} { + set answer [util::askuser "OVERWRITE the src/modules file $podfile ?? (generally not desirable) Y|N"] + set overwrite_type zip + } else { + set answer [util::askuser "OVERWRITE the src/modules file $tmfile ?? (generally not desirable) Y|N"] + set overwrite_type $opt_type + } + if {[string tolower $answer] eq "y"} { + #REVIEW - all pods zip - for now + punk::mix::commandset::module::new -project $projectname -type $overwrite_type -force 1 $m + } + } + } + } + } else { + puts stderr "project.new WARNING template hasn't created src/modules - skipping creation of new module(s) for project" + } + + #generate www/man/md output in 'embedded' folder which should be checked into repo for online documentation + if {[file exists $projectdir/src]} { + ::cd $projectdir/src + #---------- + set installer [punkcheck::installtrack new project.new $projectdir/src/.punkcheck] + $installer set_source_target $projectdir/src/doc $projectdir/src/embedded + set event [$installer start_event {-install_step kettledoc}] + $event targetset_init VIRTUAL kettle_build_doc ;#VIRTUAL - since there is no specific target file - and we don't know all the files that will be generated + $event targetset_addsource $projectdir/src/doc ;#whole doc tree is considered the source + #---------- + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "BUILDING DOCS at src/embedded from src/doc" + if {[catch { + + punk::mix::cli::lib::kettle_call lib doc + #Kettle doc + + } errM]} { + $event targetset_end FAILED -note "kettle_build_doc failed: $errM" + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "No change detected in src/doc" + $event targetset_end SKIPPED + } + $event end + $event destroy + $installer destroy + } + + ::cd $projectdir + + if {![punk::repo::is_fossil_root $projectdir]} { + set first_fossil 1 + #-k = keep. (only modify the manifest file(s)) + if {$is_nested_fossil} { + set fossilopen [runx -n {*}$fossil_prog open --nested $repodb_folder/$projectname.fossil -k --workdir $projectdir] + } else { + set fossilopen [runx -n {*}$fossil_prog open $repodb_folder/$projectname.fossil -k --workdir $projectdir] + } + if {[file exists $projectdir/_FOSSIL_] && ![file exists $projectdir/.fslckout]} { + file rename $projectdir/_FOSSIL_ $projectdir/.fslckout + } + if {[dict get $fossilopen exitcode] != 0} { + puts stderr "fossil open in project workdir '$projectdir' FAILED:" + puts stderr [dict get $fossilopen stderr] + return + } else { + puts stdout "fossil open in project workdir '$projectdir' OK:" + puts stdout [dict get $fossilopen stdout] + } + } else { + set first_fossil 0 + } + set fossiladd [runx -n {*}$fossil_prog add --dotfiles $projectdir] + if {[dict get $fossiladd exitcode] != 0} { + puts stderr "fossil add workfiles in workdir '$projectdir' FAILED:" + puts stderr [dict get $fossiladd stderr] + return + } else { + puts stdout "fossil add workfiles in workdir '$projectdir' OK:" + puts stdout [dict get $fossiladd stdout] + } + if {$first_fossil} { + #fossil commit may prompt user for input.. runx runout etc will pause with no prompts + util::do_in_path $projectdir { + set fossilcommit [run -n {*}$fossil_prog commit -m "initial project commit"] + } + if {[dict get $fossilcommit exitcode] != 0} { + puts stderr "fossil commit in workdir '$projectdir' FAILED" + return + } else { + puts stdout "fossil commit in workdir '$projectdir' OK" + } + } + + puts stdout "-done- project:$projectname projectdir: $projectdir" + } + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::mix::commandset::project ---}] + + namespace eval collection { + #*** !doctools + #[subsection {Namespace punk::mix::commandset::project::collection}] + #[para] commandset functions for operating with multiple projects. + #[para] It would usually be imported with the prefix "projects" and separator "." to result in commands such as: projects.detail + #[list_begin definitions] + namespace export * + namespace path [namespace parent] + + #e.g imported as 'projects' + proc _default {{glob {}} args} { + #*** !doctools + #[call [fun _default] [arg glob] [opt {option value...}]] + #[para]List projects under fossil management, showing fossil db location and number of checkouts + #[para]The glob argument is optional unless option/value pairs are also supplied, in which case * should be explicitly supplied + #[para]glob restricts output based on the name of the fossil db file e.g s* for all projects beginning with s + #[para]The _default function is made available in the ensemble by the name of the prefix used when importing the commandset. + #[para]e.g + #[para] punk::overlay::import_commandset projects . ::punk::mix::commandset::project::collection + #[para]Will result in the command being available as projects + package require overtype + set db_projects [lib::get_projects $glob] + set col1items [lsearch -all -inline -index 0 -subindices $db_projects *] + set col2items [lsearch -all -inline -index 1 -subindices $db_projects *] + set checkouts [lsearch -all -inline -index 2 -subindices $db_projects *] + set col3items [lmap v $checkouts {llength $v}] + + set title1 "Fossil Repo DB" + set widest1 [tcl::mathfunc::max {*}[lmap v [concat [list $title1] $col1items] {punk::strlen $v}]] + set col1 [string repeat " " $widest1] + set title2 "File Name" + set widest2 [tcl::mathfunc::max {*}[lmap v [concat [list $title2] $col2items] {punk::strlen $v}]] + set col2 [string repeat " " $widest2] + set title3 "Checkouts" + set widest3 [tcl::mathfunc::max {*}[lmap v [concat [list $title3] $col3items] {string length $v}]] + set col3 [string repeat " " $widest3] + set tablewidth [expr {$widest1 + 1 + $widest2 + 1 + $widest3}] + + + append msg "[overtype::left $col1 $title1] [overtype::left $col2 $title2] [overtype::left $col3 $title3]" \n + append msg [string repeat "=" $tablewidth] \n + foreach p $col1items n $col2items c $col3items { + append msg "[overtype::left $col1 $p] [overtype::left $col2 $n] [overtype::right $col3 $c]" \n + } + return $msg + #return [list_as_lines [lib::get_projects $glob]] + } + proc detail {{glob {}} args} { + package require overtype + package require textutil + set defaults [dict create\ + -description 0\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- + set opt_description [dict get $opts -description] + # -- --- --- --- --- --- --- + + + set db_projects [lib::get_projects $glob] + set col1_dbfiles [lsearch -all -inline -index 0 -subindices $db_projects *] + set col2items [lsearch -all -inline -index 1 -subindices $db_projects *] + set checkouts [lsearch -all -inline -index 2 -subindices $db_projects *] + set col3items [lmap v $checkouts {llength $v}] + + set col4_pnames [list] + set col5_pcodes [list] + set col6_dupids [list] + set col7_pdescs [list] + set codes [dict create] + set file_idx 0 + foreach dbfile $col1_dbfiles { + set project_name "" + set project_code "" + set project_desc "" + set db_error "" + if {[file exists $dbfile]} { + if {[catch { + sqlite3 dbp $dbfile + dbp eval {select name,value from config where name like 'project-%';} r { + if {$r(name) eq "project-name"} { + set project_name $r(value) + } elseif {$r(name) eq "project-code"} { + set project_code $r(value) + } elseif {$r(name) eq "project-description"} { + set project_desc $r(value) + } + } + } errM]} { + set db_error $errM + } + catch {dbp close} + } else { + set db_error "fossil file $dbfile missing" + } + lappend col4_pnames $project_name + lappend col5_pcodes $project_code + dict lappend codes $project_code $dbfile + lappend col7_pdescs $project_desc + if {$db_error ne ""} { + lset col1_dbfiles $file_idx "[a+ web-red]$dbfile[a]" + } + incr file_idx + } + + set setid 1 + set codeset [dict create] + dict for {code dbs} $codes { + if {[llength $dbs]>1} { + dict set codeset $code setid $setid + dict set codeset $code count [llength $dbs] + dict set codeset $code seen 0 + incr setid + } + } + set dupid 1 + foreach pc $col5_pcodes { + if {[dict exists $codeset $pc]} { + set seen [dict get $codeset $pc seen] + set this_seen [expr {$seen + 1}] + dict set codeset $pc seen $this_seen + lappend col6_dupids "[dict get $codeset $pc setid].${this_seen}/[dict get $codeset $pc count]" + } else { + lappend col6_dupids "" + } + } + + set title1 "Fossil Repo DB" + set widest1 [tcl::mathfunc::max {*}[lmap v [concat [list $title1] $col1_dbfiles] {punk::strlen $v}]] + set col1 [string repeat " " $widest1] + set title2 "File Name" + set widest2 [tcl::mathfunc::max {*}[lmap v [concat [list $title2] $col2items] {punk::strlen $v}]] + set col2 [string repeat " " $widest2] + set title3 "Checkouts" + set widest3 [tcl::mathfunc::max {*}[lmap v [concat [list $title3] $col3items] {string length $v}]] + set col3 [string repeat " " $widest3] + set title4 "Project Name" + set widest4 [tcl::mathfunc::max {*}[lmap v [concat [list $title4] $col4_pnames] {string length $v}]] + set col4 [string repeat " " $widest4] + set title5 "Project Code" + set widest5 [tcl::mathfunc::max {*}[lmap v [concat [list $title5] $col5_pcodes] {string length $v}]] + set col5 [string repeat " " $widest5] + set title6 "Dup" + set widest6 [tcl::mathfunc::max {*}[lmap v [concat [list $title6] $col6_dupids] {string length $v}]] + set col6 [string repeat " " $widest6] + set title7 "Description" + #set widest7 [tcl::mathfunc::max {*}[lmap v [concat [list $title4] $col7_pdescs] {string length $v}]] + set widest7 35 + set col7 [string repeat " " $widest7] + + + set tablewidth [expr {$widest1 + 1 + $widest2 + 1 + $widest3 +1 + $widest4 + 1 + $widest5 + 1 + $widest6}] + + append msg "[overtype::left $col1 $title1] [overtype::left $col2 $title2] [overtype::left $col3 $title3]\ + [overtype::left $col4 $title4] [overtype::left $col5 $title5] [overtype::left $col6 $title6]" + if {!$opt_description} { + append msg \n + } else { + append msg "[overtype::left $col7 $title7]" \n + set tablewidth [expr {$tablewidth + 1 + $widest7}] + } + + append msg [string repeat "=" $tablewidth] \n + foreach p $col1_dbfiles n $col2items c $col3items pn $col4_pnames pc $col5_pcodes dup $col6_dupids desc $col7_pdescs { + set desclines [split [textutil::adjust $desc -length $widest7] \n] + set desc1 [lindex $desclines 0] + append msg "[overtype::left $col1 $p] [overtype::left $col2 $n] [overtype::right $col3 $c]\ + [overtype::left $col4 $pn] [overtype::left $col5 $pc] [overtype::left $col6 $dup]" + if {!$opt_description} { + append msg \n + } else { + append msg " [overtype::left $col7 $desc1]" \n + foreach dline [lrange $desclines 1 end] { + append msg "$col1 $col2 $col3 $col4 $col5 $col6 [overtype::left $col7 $dline]" \n + } + } + } + return $msg + #return [list_as_lines [lib::get_projects $glob]] + } + proc cd {{glob {}} args} { + dict set args -cd 1 + work $glob {*}$args + } + proc work {{glob {}} args} { + package require sqlite3 + set db_projects [lib::get_projects $glob] + if {[llength $db_projects] == 0} { + puts stderr "::punk::mix::commandset::project::work No Repo DB name matches found for '$glob'" + return "" + } + #list of lists of the form: + #{fosdb fname workdirlist} + set defaults [dict create\ + -cd 0\ + -detail "\uFFFF"\ + ] + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- + set opt_cd [dict get $opts -cd] + # -- --- --- --- --- --- --- + set opt_detail [dict get $opts -detail] + set opt_detail_explicit_zero 1 ;#default assumption only + if {$opt_detail eq "\uFFFF"} { + set opt_detail_explicit_zero 0 + set opt_detail 0; #default + } + # -- --- --- --- --- --- --- + set workdir_dict [dict create] + set all_workdirs [list] + foreach pinfo $db_projects { + lassign $pinfo fosdb name workdirs + foreach wdir $workdirs { + dict set workdir_dict $wdir $pinfo + lappend all_workdirs $wdir + } + } + set col_rowids [list] + set workdirs [lsort -index 0 $all_workdirs] + set col_dupids [list] + set col_fnames [list] + set col_pnames [list] + set col_pcodes [list] + set col_dupids [list] + + set fosdb_count [dict create] + set fosdb_dupset [dict create] + set fosdb_cache [dict create] + set dupset 0 + set rowid 1 + foreach wd $workdirs { + set wdinfo [dict get $workdir_dict $wd] + lassign $wdinfo fosdb nm siblingworkdirs + dict incr fosdb_count $fosdb + set dbcount [dict get $fosdb_count $fosdb] + if {[llength $siblingworkdirs] > 1} { + if {![dict exists $fosdb_dupset $fosdb]} { + #first time this multi-checkout fosdb seen + dict set fosdb_dupset $fosdb [incr dupset] + } + set dupid "[dict get $fosdb_dupset $fosdb].$dbcount/[llength $siblingworkdirs]" + } else { + set dupid "" + } + if {$dbcount == 1} { + set pname "" + set pcode "" + if {[file exists $fosdb]} { + if {[catch { + sqlite3 fdb $fosdb + set pname [lindex [fdb eval {select value from config where name = 'project-name'}] 0] + set pcode [lindex [fdb eval {select value from config where name = 'project-code'}] 0] + fdb close + dict set fosdb_cache $fosdb [list name $pname code $pcode] + } errM]} { + puts stderr "!!! problem with fossil db: $fosdb when examining workdir $wd" + puts stderr "!!! error: $errM" + } + } else { + puts stderr "!!! missing fossil db $fosdb" + } + } else { + set info [dict get $fosdb_cache $fosdb] + lassign $info _name pname _code pcode + } + lappend col_rowids $rowid + lappend col_fnames $nm + lappend col_dupids $dupid + lappend col_pnames $pname + lappend col_pcodes [string range $pcode 0 9] + incr rowid + } + + set col_states [list] + set state_title "" + #if only one set of fossil checkouts in the resultset and opt_detail is 0 and not explicit - retrieve workingdir state for each co + if {([llength [dict keys $fosdb_cache]] == 1)} { + if {!$opt_detail_explicit_zero} { + set opt_detail 1 + } + puts stderr "Result is from a single repo db [dict keys $fosdb_cache]" + } + if {$opt_detail} { + if {!$opt_detail_explicit_zero} { + set detailmsg "Use -detail 0 to omit file state" + } else { + set detailmsg "" + } + puts stderr "Gathering file state for [llength $workdirs] checkout folder(s). $detailmsg" + set c_rev [list] + set c_rev_iso [list] + set c_unchanged [list] + set c_changed [list] + set c_new [list] + set c_missing [list] + set c_extra [list] + foreach wd $workdirs { + set wd_state [punk::repo::workingdir_state $wd] + set state_dict [punk::repo::workingdir_state_summary_dict $wd_state] + lappend c_rev [string range [dict get $state_dict revision] 0 9] + lappend c_rev_iso [dict get $state_dict revision_iso8601] + lappend c_unchanged [dict get $state_dict unchanged] + lappend c_changed [dict get $state_dict changed] + lappend c_new [dict get $state_dict new] + lappend c_missing [dict get $state_dict missing] + lappend c_extra [dict get $state_dict extra] + puts -nonewline stderr "." + } + puts -nonewline stderr \n + set t0 "Revision" + set w0 [tcl::mathfunc::max {*}[lmap v [concat [list $t0] $c_rev] {string length $v}]] + set c0 [string repeat " " $w0] + set t0b "Revision iso8601" + set w0b [tcl::mathfunc::max {*}[lmap v [concat [list $t0] $c_rev_iso] {string length $v}]] + set c0b [string repeat " " $w0b] + set t1 "Unch" + set w1 [tcl::mathfunc::max {*}[lmap v [concat [list $t1] $c_unchanged] {string length $v}]] + set c1 [string repeat " " $w1] + set t2 "Chgd" + set w2 [tcl::mathfunc::max {*}[lmap v [concat [list $t2] $c_changed] {string length $v}]] + set c2 [string repeat " " $w2] + set t3 "New" + set w3 [tcl::mathfunc::max {*}[lmap v [concat [list $t3] $c_new] {string length $v}]] + set c3 [string repeat " " $w3] + set t4 "Miss" + set w4 [tcl::mathfunc::max {*}[lmap v [concat [list $t4] $c_missing] {string length $v}]] + set c4 [string repeat " " $w4] + set t5 "Extr" + set w5 [tcl::mathfunc::max {*}[lmap v [concat [list $t5] $c_extra] {string length $v}]] + set c5 [string repeat " " $w5] + + set state_title "[overtype::left $c0 $t0] [overtype::left $c0b $t0b] [overtype::right $c1 $t1] [overtype::right $c2 $t2] [overtype::right $c3 $t3] [overtype::right $c4 $t4] [overtype::right $c5 $t5]" + foreach r $c_rev iso $c_rev_iso u $c_unchanged c $c_changed n $c_new m $c_missing e $c_extra { + lappend col_states "[overtype::left $c0 $r] [overtype::left $c0b $iso] [overtype::right $c1 $u] [overtype::right $c2 $c] [overtype::right $c3 $n] [overtype::right $c4 $m] [overtype::right $c5 $e]" + } + } + + set msg "" + if {$opt_cd} { + set title0 "CD" + } else { + set title0 "" + } + set widest0 [tcl::mathfunc::max {*}[lmap v [concat [list $title0] $col_rowids] {punk::strlen $v}]] + set col0 [string repeat " " $widest0] + set title1 "Checkout dir" + set widest1 [tcl::mathfunc::max {*}[lmap v [concat [list $title1] $workdirs] {punk::strlen $v}]] + set col1 [string repeat " " $widest1] + set title2 "Repo DB name" + set widest2 [tcl::mathfunc::max {*}[lmap v [concat [list $title2] $col_fnames] {string length $v}]] + set col2 [string repeat " " $widest2] + set title3 "CO dup" + set widest3 [tcl::mathfunc::max {*}[lmap v [concat [list $title3] $col_dupids] {string length $v}]] + set col3 [string repeat " " $widest3] + set title4 "Project Name" + set widest4 [tcl::mathfunc::max {*}[lmap v [concat [list $title4] $col_pnames] {string length $v}]] + set col4 [string repeat " " $widest4] + set title5 "Project Code" + set widest5 [tcl::mathfunc::max {*}[lmap v [concat [list $title5] $col_pcodes] {string length $v}]] + set col5 [string repeat " " $widest5] + + set tablewidth [expr {$widest0 + 1 + $widest1 + 1 + $widest2 + 1 + $widest3 +1 + $widest4 + 1 + $widest5}] + append msg "[overtype::right $col0 $title0] [overtype::left $col1 $title1] [overtype::left $col2 $title2] [overtype::left $col3 $title3] [overtype::left $col4 $title4] [overtype::left $col5 $title5]" + + if {[llength $col_states]} { + set title6 $state_title + set widest6 [tcl::mathfunc::max {*}[lmap v [concat [list $title6] $col_states] {string length $v}]] + set col6 [string repeat " " $widest6] + incr tablewidth [expr {$widest6 + 1}] + append msg " [overtype::left $col6 $title6]" \n + } else { + append msg \n + } + append msg [string repeat "=" $tablewidth] \n + + if {[llength $col_states]} { + foreach row $col_rowids wd $workdirs db $col_fnames dup $col_dupids pname $col_pnames pcode $col_pcodes s $col_states { + append msg "[overtype::right $col0 $row] [overtype::left $col1 $wd] [overtype::left $col2 $db] [overtype::right $col3 $dup] [overtype::left $col4 $pname] [overtype::left $col5 $pcode] [overtype::left $col6 $s]" \n + } + } else { + foreach row $col_rowids wd $workdirs db $col_fnames dup $col_dupids pname $col_pnames pcode $col_pcodes { + append msg "[overtype::right $col0 $row] [overtype::left $col1 $wd] [overtype::left $col2 $db] [overtype::right $col3 $dup] [overtype::left $col4 $pname] [overtype::left $col5 $pcode]" \n + } + } + set numrows [llength $col_rowids] + if {$opt_cd && $numrows >= 1} { + puts stdout $msg + if {$numrows == 1} { + set workingdir [lindex $workdirs 0] + puts stdout "1 result. Changing dir to $workingdir" + if {[file exists $workingdir]} { + ::cd $workingdir + return $workingdir + } else { + puts stderr "path $workingdir doesn't appear to exist" + return [pwd] + } + } else { + set answer [util::askuser "Change directory to working folder - select a number from 1 to [llength $col_rowids] or any other key to cancel."] + if {[string trim $answer] in $col_rowids} { + set index [expr {$answer - 1}] + set workingdir [lindex $workdirs $index] + ::cd $workingdir + puts stdout [deck stat] + return $workingdir + } + } + } + return $msg + } + #*** !doctools + #[list_end] [comment {-- end collection namespace definitions --}] + } + + namespace eval lib { + proc template_tag {tagname} { + #todo - support different tagwrappers - it shouldn't be so likely to collide with common code idioms etc. + #we need to detect presence of tags intended for punk::mix system + #consider using punk::cap to enable multiple template-substitution providers with their own set of tagnames and/or tag wrappers, where substitution providers are all run + return [string cat % $tagname %] + } + #get project info only by opening the central confg-db + #(will not have proper project-name etc) + proc get_projects {{globlist {}} args} { + if {![llength $globlist]} { + set globlist [list *] + } + set fossil_prog [auto_execok fossil] + + set configdb [punk::repo::fossil_get_configdb] + + package require sqlite3 + ::sqlite3 fosconf $configdb + #set testresult [fosconf eval {select name,value from global_config;}] + #puts stderr $testresult + set project_repos [fosconf eval {select name from global_config where name like 'repo:%';}] + set paths_and_names [list] + foreach pr $project_repos { + set path [string trim [string range $pr 5 end]] + set nm [file rootname [file tail $path]] + set ckouts [fosconf eval {select name from global_config where value = $path;}] + set checkout_paths [list] + #strip "ckout:" + foreach ck $ckouts { + lappend checkout_paths [string trim [string range $ck 6 end]] + } + lappend paths_and_names [list $path $nm $checkout_paths] + } + set filtered_list [list] + foreach glob $globlist { + set matches [lsearch -all -inline -index 1 $paths_and_names $glob] + foreach m $matches { + if {$m ni $filtered_list} { + lappend filtered_list $m + } + } + } + set projects [lsort -index 1 $filtered_list] + return $projects + } + + } + + + + + +} + + +#*** !doctools +#[manpage_end] + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::project [namespace eval punk::mix::commandset::project { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/repo-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/repo-0.1.0.tm new file mode 100644 index 00000000..2b3ca282 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/repo-0.1.0.tm @@ -0,0 +1,420 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::repo 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::repo { + namespace export * + proc tickets {{project ""}} { + set result "" + if {[string length $project]} { + puts stderr "project status unimplemented" + return + } + set active_dir [pwd] + append result "Retrieving top 10 tickets only (for more, use fossil timeline -n -t t)" \n + append result [exec fossil timeline -n 10 -t t] + + return $result + } + + proc fossilize { args} { + #check if project already managed by fossil.. initialise and check in if not. + puts stderr "unimplemented" + } + + proc unfossilize {projectname args} { + #remove/archive .fossil + puts stderr "unimplemented" + } + proc state {} { + set result "" + set repopaths [punk::repo::find_repos [pwd]] + set repos [dict get $repopaths repos] + if {![llength $repos]} { + append result [dict get $repopaths warnings] + } else { + append result [dict get $repopaths warnings] + lassign [lindex $repos 0] repopath repotypes + if {"fossil" in $repotypes} { + append result \n "Fossil repo based at $repopath" + set repostate [punk::repo::workingdir_state $repopath -repopaths $repopaths -repotypes fossil] + append result \n [punk::repo::workingdir_state_summary $repostate] + } + if {"git" in $repotypes} { + append result \n "Git repo based at $repopath" + set repostate [punk::repo::workingdir_state $repopath -repopaths $repopaths -repotypes git] + append result \n [punk::repo::workingdir_state_summary $repostate] + } + } + return $result + } + proc fossil-move-repository {{path ""}} { + set searchbase [pwd] + set projectinfo [punk::repo::find_repos $searchbase] + set projectbase [dict get $projectinfo closest] + set is_fossil [expr {"fossil" in [dict get $projectinfo closest_types]}] + if {[catch { + package require sqlite3 + } errM]} { + puts stderr "sqlite3 package failed to load" + puts stderr "Try using 'fossil test-move-repository ' from within an open checkout folder, or ensure that the Tcl sqlite3 package is available." + return + } + set ansiprompt [a+ green bold] + set ansiwarn [a+ red bold] + set ansihighlight [a+ cyan bold] + set ansireset [a] + + set in_checkout 0 + set is_checkout_relink 0; #whether we are attempting to link a checkout that has lost its repo + #we may also encounter a different kind of relink candidate - other checkouts of the same repo that we examine and find don't point back. + if {$projectbase eq "" || !$is_fossil} { + set repodbs [glob -dir $searchbase -type f -tail *.fossil] + if {![llength $repodbs]} { + puts stderr "Current directory does not seem to be directly below a fossil checkout, and no .fossil files found" + puts stderr "Please move to a folder containing the .fossil repository database to move, or to a folder directly within a fossil checkout (and with no intermediate git/fossil repos)" + return + } + set choice_files [list] + set i 1 + set menu_message "" + append menu_message "${ansiprompt}Select the number of the fossil repo db to potentially move (confirmation will be requested before any action is taken)${ansireset}" \n + foreach db $repodbs { + sqlite3 dbinfo [file join $searchbase $db] + set ckouts [dbinfo eval {select name from config where name like 'ckout:%'}] + dbinfo close + lappend choice_files [list index $i repofile $db checkouts [llength $ckouts]] + append menu_message "$i $db checkouts: [llength $ckouts]" \n + incr i + } + puts stdout $menu_message + set max [llength $choice_files] + if {$max == 1} { + set rangemsg "the number 1" + } else { + set rangemsg "a number from 1 to $max" + } + set answer [punk::repo::askuser "${ansiprompt}Enter $rangemsg to select a .fossil repository database to show details and potentially move. (or N to abort)${ansireset}"] + if {![string is integer -strict $answer]} { + puts stderr "Aborting" + return + } + + set index [expr {int($answer) -1}] + if {$index >= 0 && $index <= $max-1} { + set repo_file_choice [lindex $choice_files $index] + set repo_file [dict get $repo_file_choice repofile] + set repo_file [file join $searchbase $repo_file] + puts stdout "Selected fossil repo database file: $repo_file" + } else { + puts stderr " No menu number matched - aborting." + return + } + } else { + if {[file exists $projectbase/_FOSSIL_]} { + set cdbfile [file join $projectbase/_FOSSIL_] + } elseif {[file exists $projectbase/.fslckout]} { + set cdbfile [file join $projectbase/.fslckout] + } else { + puts stderr "No checkout database (_FOSSIL_ or .fslckout) found in nearest repository folder $projectbase (looked upwards from $searchbase)" + puts stderr "Unable to locate repository databases for potential move. Please move to a checkout folder or a folder containing .fossil repositories" + puts stderr "If run from a location where repositories are found, fossil-move-repository will give you the option to select a repository or cancel the operation" + return + } + set in_checkout 1 + sqlite3 cdb $cdbfile + set repo_file [cdb eval {select value from vvar where name='repository'}] + cdb close + if {[string length [string trim $repo_file]] && [file pathtype $repo_file] eq "relative"} { + set repo_file [file join $projectbase $repo_file] + } + if {![string length [string trim $repo_file]] || ![file exists $repo_file]} { + puts stderr "${ansiwarn}Checkout at $projectbase points to repository '$repo_file' - but it doesn't seem to exist${ansireset}" + set answer [punk::repo::askuser "${ansiprompt}Do you want to link this to an existing repository file? (Y|N)${ansireset}"] + if {[string match y* [string tolower $answer]]} { + set is_checkout_relink 1 + } else { + puts stderr "Aborting - Unable to link this checkout dir to a repository database file" + return + } + } + } + + set pname [file rootname [file tail $repo_file]] + set full_path_repo_file [file join $searchbase $repo_file] + if {[file isfile $full_path_repo_file]} { + sqlite3 dbinfo [file join $searchbase $repo_file] + set ckouts [dbinfo eval {select name from config where name like 'ckout:%'}] + dbinfo close + if {![llength $ckouts]} { + puts stdout "Repository db at [file join $searchbase $repo_file] appears to have no open checkouts" + } else { + puts stdout "Repository db at [file join $searchbase $repo_file] appears to have [llength $ckouts] open checkouts:" + foreach ck $ckouts { + puts stdout [string range $ck 6 end] + } + } + } else { + puts stderr "${ansiwarn}Missing repository db at $full_path_repo_file${ansireset}" + } + puts stdout "${ansihighlight}Report for all projects with repository file name $pname${ansireset}" + puts stdout [punk::mix::commandset::project::collection::detail $pname] + puts stdout [punk::mix::commandset::project::collection::work $pname -detail 1] + + #todo + #ask user if they want to select a different pname + set wantrenameprompt "${ansiprompt}Would you like to rename the .fossil file? (Y|N)${ansireset}" + append wantrenameprompt \n "${ansiprompt}.eg change $pname.fossil to something else such as ${pname}_new.fossil${ansireset}" + set answer [punk::repo::askuser $wantrenameprompt] + set pname2 $pname + if {[string match y* [string tolower $answer]]} { + set dorenameprompt "${ansiprompt}Enter the new name and hit enter. (Just an alphanumeric name (possibly with dots/dashes/underscores) without .fossil and without any path)${ansireset}" + set namechoice [punk::repo::askuser $dorenameprompt] + if {[string length $namechoice]} { + set permittedmap [list . "" - "" _ ""] + if {[string is alnum -strict [string map $permittedmap $namechoice]]} { + set pname2 $namechoice + } else { + puts stderr "Entered name was invalid. Must be numbers,letters,underscore,dot,dash" + } + } + puts stdout "Continuing with name $pname2 - cancel at next prompt if this is incorrect" + } + + set target_repodb_folder [punk::repo::fossil_get_repository_folder_for_project $pname2 -parentfolder $searchbase -askpath 1] + #target_repodb_folder might be same as source folder - check for same file if name wasn't changed? + if {![string length $target_repodb_folder]} { + puts stderr "No usable repository database folder selected for $pname2.fossil file" + return + } + + set existing_target_repofile 0 + if {[file exists $target_repodb_folder/$pname2.fossil]} { + set existing_target_repofile 1 + puts stdout "${ansiwarn}NOTICE: $target_repodb_folder/$pname2.fossil already exists${ansireset}" + if {!$is_checkout_relink} { + set finalquestion "${ansiprompt}Are you sure you want to switch the repository $repo_file for the open checkout(s) to the existing file $target_repodb_folder/$pname2.fossil? (Y|N)${ansireset}" + } else { + set finalquestion "${ansiprompt}Are you sure you want to attempt to linke the repository (previously linked with '$repo_file') for the open checkout(s) to the existing file $target_repodb_folder/$pname2.fossil? (Y|N)${ansireset}" + } + } else { + if {!$is_checkout_relink} { + set finalquestion "${ansiprompt}Proceed to move repository $repo_file to the new file $target_repodb_folder/$pname2.fossil? Y|N${ansireset}" + } else { + set finalquestion "${ansiprompt}Proceed to attempt link for missing repo db $repo_file to the new file $target_repodb_folder/$pname2.fossil? Y|N${ansireset}" + } + } + set line "${ansiwarn}[string repeat - [string length $finalquestion]]${ansireset}" + set finalprompt $line\n + append finalprompt $finalquestion \n + append finalprompt $line \n + + set answer [punk::repo::askuser $finalprompt] + if {[string match y* [string tolower $answer]]} { + if {!$existing_target_repofile && !$is_checkout_relink} { + if {[catch { + file copy $repo_file $target_repodb_folder/$pname2.fossil + } errM]} { + puts stderr "${ansiwarn}FAILED to copy $repo_file to $target_repodb_folder/$pname2.fossil - aborting${ansireset}" + puts stderr "Error message was:\n $errM" + return + } + if {$in_checkout} { + #in_checkout means we can assume projectbase var exists + #there may be other checkouts on the old repo + #if so, we will remind the user of their existence + if {[catch {exec fossil test-move-repository $target_repodb_folder/$pname2.fossil} errM]} { + puts stderr "${ansiwarn}The fossil test-move-repository command appears to have failed${ansireset}" + puts stderr "$errM" + } else { + + sqlite3 oldrepo $repo_file + set ckouts [oldrepo eval {select name from config where name like 'ckout:%'}] + set pcode [oldrepo eval {select value from config where name = 'project-code'}] + oldrepo close + if {[string length $pcode] < 20} { + puts stderr "WARNING: Failed to get project-code from repo db $repo_file" + } + set other_checkouts [list] + set norm_projectbase [file normalize $projectbase] + foreach ck $ckouts { + set ckfolder [string trim [string range $ck 6 end]] + if {![file isdirectory $ckfolder]} { + #as the process was launched within a checkout - we won't bother user with reports of non-existant other checkouts + continue + } + if {[file normalize $ckfolder] ne $norm_projectbase} { + lappend other_checkouts $ckfolder + } + } + if {[llength $other_checkouts]} { + puts stderr "${ansiwarn}Other checkouts of $repo_file that may need consideration${ansireset}" + foreach other $other_checkouts { + puts stdout $other + } + } + } + } else { + #we aren't in a checkout - moving a repo to a new db location and/or name so there's no reason to prefer one checkout over another.. presumably the user either wants to move them all - or be asked.. + sqlite3 oldrepo $repo_file + set ckouts [oldrepo eval {select name from config where name like 'ckout:%'}] + oldrepo close + if {[llength $ckouts] > 1} { + puts stdout "There are [llength $ckouts] checkouts for the repository you are moving" + puts stdout "You will be asked for each checkout if you want to adjust it to point to $target_repodb_folder/$pname2.folder" + } + set original_cwd [pwd] + foreach ck $ckouts { + set ckfolder [string trim [string range $ck 6 end]] + if {![file isdirectory $ckfolder]} { + puts stderr "old repo shows a checkout at $ckfolder - but it doesn't seem to exist. Ignoring" + continue + } + cd $ckfolder + puts stdout [exec fossil info] + puts stdout [state] + set answer [punk::repo::askuser "${ansiprompt}Do you want to point this checkout to $target_repodb_folder/$pname2.folder? (Y|N) Q to stop processing checkouts${ansireset}"] + if {[string match q* [string tolower $answer]]} { + puts stderr "User aborting loop" + break + } + if {[string match y* [string tolower $answer]]} { + if {[catch {exec fossil test-move-repository $target_repodb_folder/$pname2.fossil} moveresult]} { + puts stderr "${ansiwarn}The fossil test-move-repository command appears to have failed${ansireset}" + puts stderr "$moveresult" + } else { + puts stdout "OK - move performed with result:" + puts stdout $moveresult + } + } + } + cd $original_cwd + + } + } else { + if {$is_checkout_relink} { + #relinking a lost checkout to an existing repo.. we should probably check it's other checkouts and see if they point back + if {[catch {exec fossil test-move-repository $target_repodb_folder/$pname2.fossil} errM]} { + puts stderr "${ansiwarn}The fossil test-move-repository command appears to have failed${ansireset}" + puts stderr "$errM" + } + } else { + if {$in_checkout} { + if {[catch {exec fossil test-move-repository $target_repodb_folder/$pname2.fossil} errM]} { + puts stderr "${ansiwarn}The fossil test-move-repository command appears to have failed${ansireset}" + puts stderr "$errM" + } + } else { + #not in checkout - we're wanting what pointed to one repo to point to a different existing one - presumably for all checkouts + sqlite3 newrepo $target_repodb_folder/$pname2.fossil + set newpname [newrepo eval {select value from config where name = 'project-name'}] + set newpcode [newrepo eval {select value from config where name = 'project-code'}] + set newckouts [newrepo eval {select name from config where name like 'ckout:%'}] + newrepo close + + sqlite3 oldrepo $repo_file + set oldpname [oldrepo eval {select value from config where name = 'project-name'}] + set oldpcode [oldrepo eval {select value from config where name = 'project-code'}] + set oldckouts [oldrepo eval {select name from config where name like 'ckout:%'}] + oldrepo close + if {$newpname eq $oldpname} { + set ansi_newpname [a+ green bold]$newpname[a] + set ansi_oldpname [a+ green bold]$oldpname[a] + } else { + set ansi_newpname [a+ cyan bold]$newpname[a] + set ansi_oldpname [a+ red bold]$oldpname[a] + } + if {$newpcode eq $oldpcode} { + set ansi_newpcode [a+ green bold]$newpcode[a] + set ansi_oldpcode [a+ green bold]$oldpcode[a] + } else { + set ansi_newpcode [a+ cyan bold]$newpcode[a] + set ansi_oldpcode [a+ red bold]$oldpcode[a] + } + puts stdout "Target repository $target_repodb_folder/$pname2.fossil has project-name: $ansi_newpname and [llength $newckouts] existing checkouts" + puts stdout "Target project code: $ansi_newpcode" + puts stdout "Source repository $repo_file has project-name: $ansi_oldpname and [llength $oldckouts] existing checkouts" + puts stdout "Source project code: $ansi_oldpcode" + if {[llength $oldckouts] > 1} { + puts stdout "You will be asked for each checkout if you want to adjust it to point to $target_repodb_folder/$pname2.folder" + } + set original_cwd [pwd] + foreach ck $oldckouts { + set ckfolder [string trim [string range $ck 6 end]] + if {![file isdirectory $ckfolder]} { + puts stderr "old repo shows a checkout at $ckfolder - but it doesn't seem to exist. Ignoring" + continue + } + cd $ckfolder + puts stdout [exec fossil info] + puts stdout [state] + set answer [punk::repo::askuser "${ansiprompt}Do you want to point this checkout to $target_repodb_folder/$pname2.folder? (Y|N) Q to stop processing checkouts${ansireset}"] + if {[string match q* [string tolower $answer]]} { + puts stderr "User aborting loop" + break + } + if {[string match y* [string tolower $answer]]} { + if {[catch {exec fossil test-move-repository $target_repodb_folder/$pname2.fossil} moveresult]} { + puts stderr "${ansiwarn}The fossil test-move-repository command appears to have failed${ansireset}" + puts stderr "$moveresult" + } else { + puts stdout "OK - move performed with result:" + puts stdout $moveresult + } + } + } + cd $original_cwd + + } + } + } + + + puts stdout "-done-" + } else { + puts stdout "-cancelled by user-" + } + + } +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::repo [namespace eval punk::mix::commandset::repo { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm new file mode 100644 index 00000000..c61db428 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/commandset/scriptwrap-0.1.0.tm @@ -0,0 +1,1673 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::commandset::scriptwrap 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +package require punk::mix +package require punk::mix::base +package require punk::fileline + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::commandset::scriptwrap { + namespace export * + + namespace eval fileline { + namespace import ::punk::fileline::lib::* + namespace import ::punk::fileline::class::* + } + + proc templates {args} { + package require overtype + package require textblock + set tdict_low_to_high [templates_dict {*}$args] + #convert to screen order - with higher priority at the top + set tdict [dict create] + foreach k [lreverse [dict keys $tdict_low_to_high]] { + dict set tdict $k [dict get $tdict_low_to_high $k] + } + + #set pathinfolist [dict values $tdict] + set names [dict keys $tdict] + + #set paths [lsearch -all -inline -index 1 -subindices $pathinfolist *]; #first key of templates_dict is path + set paths [list] + set pathtypes [list] + dict for {nm tinfo} $tdict { + lappend paths [dict get $tinfo path] + lappend pathtypes [dict get $tinfo sourceinfo pathtype] + } + + package require textblock + set title(name) "Template Name" + set widest(name) [tcl::mathfunc::max {*}[lmap v [concat [list $title(name)] $names] {string length $v}]] + set col(name) [string repeat " " $widest(name)] + + set title(pathtype) "[a+ green]Path\nType[a]" + set widest(pathtype) [tcl::mathfunc::max {*}[lmap v [concat [list $title(pathtype)] $pathtypes] {textblock::width $v}]] + set col(pathtype) [string repeat " " $widest(pathtype)] + + set title(path) "Path" + set widest(path) [tcl::mathfunc::max {*}[lmap v [concat [list $title(path)] $paths] {string length $v}]] + set col(path) [string repeat " " $widest(path)] + + + + + set tablewidth [expr {$widest(name) + 1 + $widest(pathtype) + $widest(path)}] + set table "" + append table [string repeat - $tablewidth] \n + append table [textblock::join -- [overtype::left $col(name) $title(name)] " " [overtype::left $col(pathtype) $title(pathtype)] " " [overtype::left $col(path) $title(path)]] \n + append table [string repeat - $tablewidth] \n + + foreach n $names pt $pathtypes p $paths { + append table "[overtype::left $col(name) $n] [overtype::left $col(pathtype) $pt] [overtype::left $col(path) $p]" \n + } + + + return $table + } + proc templates_dict {args} { + package require punk::cap + if {[punk::cap::capability_has_handler punk.templates]} { + return [punk::cap::call_handler punk.templates get_itemdict_scriptappwrappers {*}$args] + } else { + put stderr "commandset::scriptwrap::templates_dict WARNING - no handler available for the 'punk.templates' capability - template providers will be unable to provide template locations" + } + return + } + + + #A batch file with unix line-endings is sensitive to label positioning. + #batch file with windows crlf line endings can exhibit this problem - but probably only if specifically crafted with long lines deliberately designed to trigger it. + #see: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 (Call and goto may fail when the batch file has Unix line endings) + #The windows batch file scanner appears to parse in 512 Byte chunks. + #If a label following a call/goto is at a position spanning a 512 byte block as counted from the call/goto site (callsite assumed to be EOL - works for basic cases at least) then the label won't be found. + #A label preceding a call/goto site can't span a 512 byte boundary as counted from the beginning of the file + #checkfile produces warnings and errors in ansi-coloured form (both to stdout and a summary in the return value) + #The script should then be adjusted with comments and/or whitespace and checkfile should be re-run to confirm there are no new boundary-spanning labels. + #checkfile needs to be run even on previously tested scriptwrapper templates because the final :exit label is beyond the payloads and so could span a 512 Byte block + #It is more likely to catch issues if adjustments are made to the initial batch-script code in a template. + # + #cmd allows labels at call sites to span lines with line continuation character ^ + #target labels can't span lines with ^ - cmd doesn't recognise them (They possibly do span in a way - but but the newlines may be included in the label - so they may be hard/impossible to call). + #Note that we can't filter obviously non-batch-script lines before processing - as the way batch label-scanning works it scans in chunks of 512 bytes so all lines are relevant. + #This means label-like things could be incorrectly found in other script data - that's partly the point of this check + #Note that we can't filter obviously non-batch-script lines before processing - as the way batch label-scanning works it scans in chunks of 512 bytes so all lines are relevant. + #This means label-like things could be incorrectly found in other script data - that's partly the point of this check. + proc checkfile {filepath args} { + if {![file exists $filepath]} { + error "punk::mix::commandset:scriptwrap error cannot find file '$filepath'" + } + set crlf_lf_replacements [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message + # -ignore_rems 1 allows testing of alignment state if rems were stripped - todo - lf/crlf-preserving rem strip function + set opts [dict create\ + -ignore_rems 0\ + -substitutionmap {}\ + -crlf_lf_replacements $crlf_lf_replacements\ + ] + foreach {k v} $args { + switch -- $k { + -ignore_rems - -substitutionmap - -crlf_lf_replacements { + dict set opts $k $v + } + default { + error "checkfile error - unknown option '$k'. Known options: [dict keys $opts]" + } + } + } + # -- --- --- --- --- --- --- + set opt_ignore_rems [dict get $opts -ignore_rems] + set opt_substitutionmap [dict get $opts -substitutionmap] + set opt_crlf_lf_replacements [dict get $opts -crlf_lf_replacements] + # -- --- --- --- --- --- --- + + # #### load file #### + ##set raw_filedata [fcat -translation binary $filepath] + # - as we may need to look at data beyond a ctrl-z (\x1A) section + set fd [open $filepath r] + fconfigure $fd -translation binary + set raw_filedata [read $fd] + close $fd + # ################### + + + set objFile [fileline::textinfo new $raw_filedata] + + if {$opt_ignore_rems} { + #! todo + error "-ignore_rems unimplemented" + if 0 { + #todo - rebuild a raw_filedata value from the resultant lines + #review. @REM can appear after other commands and an ampersand for example. + # - we are interested in stripping lines with leading REMs + # - need to work out if a REM line with dos line-continuation should + + set data "" + set skipped_rems 0 + foreach ln [split $filedata \n] { + set ln [string trim $ln] + if {[string match @REM* $ln] || [string match REM* $ln] || [string match @rem* $ln] || [string match rem* $ln]} { + #ignore + incr skipped_rems + } else { + append data $ln \n ;#!! + } + } + puts stderr "Skipped $skipped_rems rem lines" + set dsize [string length $data] + } + } else { + set dsize [string length $raw_filedata] + } + + puts stdout "Examining [$objFile chunklen] bytes of file $filepath for cmd script issues." + set le_info [$objFile chunk_le_counts 0 end] + set lf_count [dict get $le_info lf] + set crlf_count [dict get $le_info crlf] + set unterminated_count [dict get $le_info unterminated] + set total_count [expr {$lf_count + $crlf_count + $unterminated_count}] + puts stdout "lines ending in lf : $lf_count" + puts stdout "lines ending in crlf : $crlf_count" + puts stdout "unterminated lines : $unterminated_count" ;#commonly 1 for trailing data at end of file. More than one is likely to be an error - or perhaps a policy plugin with different concept of lines? + puts stdout "crlf + lf + unterminated: $total_count" + puts stdout "line count : [$objFile linecount]" + if {$total_count != [$objFile linecount]} { + puts stdout "[a+ yellow bold]WARNING: Linecount mismatch with line-endings - seems fishy[a]" + } + if {$unterminated_count > 1} { + puts stdout "[a+ yellow bold]WARNING: More than one unterminated line reported - seems fishy[a]" + } + puts "Checking line based labels and 512 byte boundaries from call sites for possible labels and code execution points." + set line_count [$objFile linecount] + set callid 0 ;#id for callsite and objects created + set file_offset 0 + set error_labels [list] + set warning_labels [list] + set call_labels_found [dict create] + set target_labels_found [dict create] + set possible_target_labels_found [dict create] + set warning_target_labels_found [dict create] + for {set callingline_index 0} {$callingline_index < $line_count} {incr callingline_index} { + set callingline_info [$objFile lineinfo $callingline_index] + set callingline_payload [dict get $callingline_info payload] + set callingline_len [dict get $callingline_info linelen] + set callingline_num [expr {$callingline_index + 1}] + + set callposn -1 + set trimln [string trim $callingline_payload] + + #if {[string match "rem *" $trimln] || [string match "@rem *" $trimln] || [string match "REM *" $trimln] || [string match "@REM *" $trimln]} {} + #ignore things that look like a call that are beind a REM + switch -glob -nocase -- $trimln { + "rem *" - "@rem *" { + + } + default { + #todo - better callsite analysis. There can be data between @GOTO or @CALL and : other than just whitespace! + + #todo - allow analysis of colon-less call. May need to check list of internal commands - but what about external ones? + #foreach search_regex [list {(.*\s+|^)(@*call\s*:*)(\S.*)} {(.*\s+|^)(@*CALL\s*:*)(\S.*)} {(.*\s+|^)(@*goto\s*:*)(\S.*)} {(.*\s+|^)(@*GOTO\s*:*)(\S.*)}] {} + foreach search_regex [list {(.*\s+|^)(@*call\s*:)(\S.*)} {(.*\s+|^)(@*CALL\s*:)(\S.*)} {(.*\s+|^)(@*goto\s*:)(\S.*)} {(.*\s*|.*\s+|^)(@*GOTO\s*:)(\S.*)} {(.*\|\|.*)(@*GOTO\s*:)(\S.*)}] { + if {[regexp $search_regex $callingline_payload _m precall call labelplus]} { + #todo further checks to see if it's actually a batch script line + # - - - - work out what cmd.exe considers start of 512B boundaries when scanning from a callsite + #callposn affected by newlines? + #set callposn [expr {$file_offset + [string length $callingline_payload]}] ;#take callposn as end of line .. review - multiline statements? + set callposn [expr {$file_offset + $callingline_len}] + + #Note there are anomalies around target labels in bracketed sections such as IF blocks + #this is bad practice - as it can lead to unbalanced braces - but batch files can still work under cmd.exe with them in some cases + #e.g unbalanced trailing bracket may be ignored. + #A working script with target-labels in braces can fail due to boundary issues we don't detect (callsite for boundary counting may need to be at end of entire multiline if block??) + #For now - just make sure punk templates don't do this - but it would be nice to be able to detect. + + #set callposn $file_offset + #set callposn [expr {$file_offset + [string length $precall]}] + # - - - - + break + } + } + set callsite_labelfound 0 ;#until proven + if {$callposn != -1} { + set callposn_lineindex [lindex [$objFile chunkrange_to_linerange $callposn $callposn] 0] + #the line represented by callposn may actually be beyond the calling_line_index + set labelinfo [batchlib::get_callsite_label $labelplus] + if {[dict get $labelinfo labelfound]} { + set callsite_labelfound 1 + set label [dict get $labelinfo label] + set call_label_record [list label $label line $callingline_num] + dict lappend call_labels_found $label $call_label_record + } else { + puts stderr "[a+ yellow bold]WARNING - apparent callsite $callposn but couldn't verify label[a]" + puts stderr "Line:\n$trimln" + } + } + + #todo - multiple calls on one line. - also - determine what cmd considers the starting point for forward scanning when call is in a structure such as an if statement. + if {$callsite_labelfound} { + puts stdout "[a+ bold cyan]CALLSITE on line $callingline_num ending at byte $callposn[a]" + set callsummary [string range "${call}${labelplus}" 0 100] + if {[string length $callsummary] < [string length ${call}${labelplus}]} { + puts stdout " CALLSITE: $callsummary (truncated to 100 bytes)" + } else { + puts stdout " CALLSITE: '${call}${labelplus}'" + } + puts stdout " [a+ cyan]FULLINE: $callingline_payload[a]" + + + + ################################## + #set labelpluswords [regexp -inline -all {\S+} $labelplus] ;#don't assume labelplus can be treated as Tcl list - use regexp to split + #NOTE it is invalid to assume label always terminated by space - pair of % characters (variable substitution) can contain a space without terminating label + #set word1 [lindex $labelpluswords 0] + + + ################################## + + + set labelsize [string length $label] + #scan forward for labels at boundaries + set forward_chunk [$objFile chunk $callposn end] + set forward_chunk_base $callposn ;#name for clarity + + incr callid + set callvar "call-${callid}_fromline-${callingline_num}" + upvar 0 $callvar objForwardScan + set objForwardScan [fileline::textinfo new $forward_chunk] + + + + ################################################################################################################################## + #Forward scan 1 - check at normal line boundaries - and see if collides with a chunk boundary - and if the label is obscured or ok + set dsize [$objForwardScan chunklen] + set num_boundaries [expr {$dsize / 512} ] + puts "scanning $dsize forward bytes in file starting at $forward_chunk_base for label '$label' - num_boundaries: $num_boundaries" + set total_offset $file_offset + set found_forward_label 0 + foreach scanlineinfo [$objForwardScan lineinfolist 0 end] { + set scanline_start [dict get $scanlineinfo start] + set scanline_bytes [dict get $scanlineinfo linelen] + set scanline [dict get $scanlineinfo payload] + + set line_start_global [expr {$forward_chunk_base + $scanline_start}] + set line_index_global [lindex [$objFile chunkrange_to_linerange $line_start_global $line_start_global] 0] + set line_num_global [expr {$line_index_global + 1}] + + set trimscanline [string trim $scanline] + + set found_targetlabel_at_line 0 ;# until disproven + if {[string first : $scanline] >= 0} { + set labelinfo [batchlib::get_target_label_from_line $scanline] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + #add to target_labels_found record below + set scan_target_label_record [list label $label line $line_num_global] + set found_targetlabel_at_line 1 + } + } + + if {$found_targetlabel_at_line} { + set scan_target_label_same_line_seen false + if {[dict exists $target_labels_found $label]} { + set thislabel_records [dict get $target_labels_found $label] + foreach previous $thislabel_records { + if {[dict get $previous line] eq $line_num_global} { + set scan_target_label_same_line_seen true + } + } + } + incr found_forward_label + if {!$scan_target_label_same_line_seen} { + set label_posn_in_line [string first : $scanline] + set labelposn [expr {$scanline_start + $label_posn_in_line}] + #we only really care about exactly landing on a boundary or else the next 512 byte boundaries above the labelposn + if {($labelposn % 512) == 0} { + set ubound [expr {($labelposn / 512) * 512}] + } else { + set ubound [expr {(($labelposn / 512)+1) * 512}] + } + set lbound [expr {$ubound - $labelsize}] + if {($labelposn >= $lbound) && ($labelposn <= $ubound)} { + dict set scan_target_label_record error linestart_and_call_offset_bytes + lappend error_labels [list label $label linestart_and_call_offset_bytes $labelposn callsite [list call ${call}${labelplus} call_linenum $callingline_num] bad_target_line $line_num_global] + puts stdout "[a+ bold red]ERROR: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite.[a]" + puts [$objForwardScan chunk_boundary_display [dict get $scanlineinfo start] [dict get $scanlineinfo end] 512 -linebase $callposn_lineindex+1 -limit 1] ;#+1 on callposn_linindex to do editor-style linenums + } else { + dict set scan_target_label_record ok 1 + puts stdout "[a+ bold green]OK: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + } + dict lappend target_labels_found $label $scan_target_label_record + } else { + puts stdout "OK - seen label $label on $line_num_global before" + } + } + incr total_offset $scanline_bytes + } + ################################################################################################################################## + + + #todo + #forward scan 2 - check any boundaries missed above because the label isn't at the begining of a line + #these are potentially hidden labels that could activate without requiring the label be at the beginning of a line + #check boundary spans relative to start of this objForwardScan chunk + + #adjust boundary-search by resetting counter each time crlf encountered + set forward_lines [$objForwardScan chunkrange_to_lineinfolist 0 end] + set boundary_positions [list 0] + set scanner_offset 0 + set scanner_position 0 + foreach forwardbline_info $forward_lines { + #review - do we need to check the payload in case we have configured the textinfo object to split the file only on lf - (not true by default) + set forwardbline_len [dict get $forwardbline_info linelen] + set forwardbline_spaninfo [fileline::range_spans_chunk_boundaries [expr {$scanner_position + $scanner_offset}] [expr {$scanner_position + $scanner_offset + $forwardbline_len}] 512] + set forwardbline_boundaries [dict get $forwardbline_spaninfo boundaries] + + foreach b $forwardbline_boundaries { + set relb [expr $b + $scanner_offset] + if {$relb <= [dict get $forwardbline_info end]} { + lappend boundary_positions $relb + } else { + #leave it for the next line - as we may need to adjust offset anyway + break + } + } + if {[dict get $forwardbline_info le] eq "crlf"} { + set scanner_offset [expr {[dict get $forwardbline_info end] - [lindex $boundary_positions end]}] ;#reset on crlf + #puts "+++++ set scanner_offset $scanner_offset" + } + set scanner_position [dict get $forwardbline_info end] + } + set boundary_positions [lsearch -all -not -inline $boundary_positions 0] + if {[llength $boundary_positions]} { + puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, boundaries (possibly with offsets) to check $boundary_positions[a]" + } else { + puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, No boundaries to check (generally expected for files with crlf line endings and no extremely long lines)[a]" + } + + + if {[llength $boundary_positions]} { + puts stdout "line $callingline_num scan from call label $label ending at position $callposn. Next Callsite-relative boundary [lindex $boundary_positions 0]" + + for {set i 0} {$i < [llength $boundary_positions]} {incr i} { + set b [lindex $boundary_positions $i] + if {$i < [llength $boundary_positions]-1} { + set nextb [lindex $boundary_positions $i+1] + set top $nextb + } else { + set top end + } + + set forwardbline_infolist [$objForwardScan chunkrange_to_lineinfolist $b $top -show_truncated 1] + set forwardbline_info [lindex $forwardbline_infolist 0] + if {[dict get $forwardbline_info is_truncated]} { + set payload_from_boundary [dict get $forwardbline_info truncated] + } else { + set payload_from_boundary [dict get $forwardbline_info payload] + } + set forwardbline_len [dict get $forwardbline_info linelen] + set forwardbline_index [dict get $forwardbline_info lineindex] + set forwardbline_start [dict get $forwardbline_info start] + set forwardbline_start_global [expr {$forward_chunk_base + $forwardbline_start}] + set forwardbline_index_global [lindex [$objFile chunkrange_to_linerange $forwardbline_start_global $forwardbline_start_global] 0] + set forwardbline_num_global [expr {$forwardbline_index_global + 1}] + + set found_targetlabel_at_boundary 0 + if {[string first : $payload_from_boundary] >= 0} { + #puts stdout "Possible label at boundary $b - testing" + set labelinfo [batchlib::get_target_label_from_line $payload_from_boundary] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + incr found_forward_label + set found_targetlabel_at_boundary 1 + } elseif {[dict get $labelinfo labelfound]} { + set unsearched_label [dict get $labelinfo label] + puts stderr "[a+ cyan]Line $forwardbline_num_global: Found an item that cmd may interpret as a target label because of its location at a boundary $b - but it doesn't seem to be the one we are looking for. Looking for '$label' Found: '[dict get $labelinfo label]'[a]" + puts stderr "[a+ yellow]Warning - if the label '$unsearched_label' on line $forwardbline_num_global isn't meant to be a target - it may be safest to make sure batch script isn't using CALL or GOTO with target :$unsearched_label" + puts stdout "linedata:\n" + #puts stdout "'$payload_from_boundary'" + puts [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] + + #dubious value to check call_labels_found - as we didn't run through and find all call labels first! + if {$unsearched_label in [dict keys $call_labels_found]} { + set boundary_target_label_record [list label $unsearched_label line $forwardbline_num_global error found_via_boundary_check_on_a_different_call_label] + dict lappend warning_target_labels_found $unsearched_label $boundary_target_label_record + } else { + set possible_target_label_record [list label $unsearched_label line $forwardbline_num_global] + dict lappend possible_target_labels_found $unsearched_label $possible_target_label_record + } + } else { + set note "" + if {[dict exists $labelinfo note]} { + set note [dict get $labelinfo note] + } + if {$note ne "prefix_fail"} { + puts stdout "no label detected at boundary $b - probably ok. Note from target-label scanner: $note" + } + } + if {$found_targetlabel_at_boundary} { + set target_label_record [list label $label line $forwardbline_num_global error call_offset_bytes] + dict lappend target_labels_found $label $target_label_record + set note "possibly unreliable or dangerous target-label at line $forwardbline_num_global may execute line [expr {$forwardbline_num_global +1}].\n" + append note "Target label not at line start but was found by scanning 512byte chunks from callsite with count resets at any crlf encountered\n" + append note "Adjust spacing between line $callingline_num and $forwardbline_num_global to avoid the 512 boundary - and re-test for other boundary problems" + lappend error_labels [list label $label call_offset_bytes $b callsite [list call ${call}${labelplus} call_linenum $callingline_num] note $note] + puts stdout "[a+ bold red]ERROR: line $forwardbline_num_global target-label [dict get $labelinfo rawlabel] found at boundary and with byte offset from callsite: $b [a]" + puts stdout "[a+ bold red] This target-label appears to fall at or just after the 512byte boundary at byte $b[a] [a+ yellow bold]from callsite.[a]" + puts stdout "[a+ bold yellow]Code may execute at line [expr {$forwardbline_num_global + 1}] (or at next 512Byte boundary in some circumstances)[a]" + puts stdout "[a+ bold yellow]Recommend adjusting spacing between line $callingline_num and $forwardbline_num_global[a]" + puts stdout [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] + } + #if found any label - peek at next boundary + if {[dict get $labelinfo labelfound] && $i+1 < [llength $boundary_positions]} { + set next_lineinfolist [$objForwardScan chunkrange_to_lineinfolist $nextb end -show_truncated 1] + set next_lineinfo [lindex $next_lineinfolist 0] + puts "peek next boundary data - line [expr {$forwardbline_num_global + 1}]:" + #if {[dict get $next_lineinfo is_truncated]} { + # puts [dict get $next_lineinfo truncated] + #} else { + # puts [dict get $next_lineinfo payload] + #} + puts [$objForwardScan chunk_boundary_display [dict get $next_lineinfo start] [dict get $next_lineinfo end] 0 -boundaries $nextb -linebase $callposn_lineindex+1 -limit 1] + } + } + } + } + $objForwardScan destroy + + #scan behind for labels at boundaries - using offset from start of file + #we do a backward scan even if a forward label has been found, so that we can warn of duplicate labels. + + set prior_start 0 + set prior_end $callingline_index ;#only scan from file start to call-site + + set pline_begin 0 + set found_backward_label 0 + set p_linenum 0 + for {set pidx $prior_start} {$pidx <= $prior_end} {incr pidx} { + set plineinfo [$objFile lineinfo $pidx] + set pline [dict get $plineinfo payload] + incr p_linenum + set pline_bytes [dict get $plineinfo linelen] ;#includes lf or crlf ending bytes + set pline_start $pline_begin + if {$pline_start != [dict get $plineinfo start]} { + error "checkfile error: line $p_linenum - calculated start $pline_start not equal to stored start [dict get $plineinfo start]" + } + set pline_end [expr {$pline_begin + $pline_bytes -1}] + if {$pline_end != [dict get $plineinfo end]} { + error "checkfile error: line $p_linenum - calculated end $pline_end not equal to stored end [dict get $plineinfo end]" + } + + + set trimpline [string trim $pline] + #todo - process leading part of line before : + #e.g the following are valid (leading # is not part of the examples) + # ====== : label + # also + #%=== == : label + # also + #%= ,,,, ;;; = : label + + #these token delimiters (; , = 0x0B ox0C 0xFF ) + #can also occur after the colon e.g + #: ;label + + #the following is a valid target for @GOTO :#something + #: ;#something + + #It is possible for closing bracket ) to also be invisible if there is no open ( active + #This only seems to work for a single ) at beggining of the line multiple ) even separated by spaces or ; etc seem to stop the target being found. + #The lone unbalanced ) can act like a comment in other contexts - and can appear multiple times, but only if first ) on the line is followed by a delimiter + #Essentially all characters following the first ) are ignored - but if the first is something like )) then cmd tries to interpret that as a command and fails + # e.g + #) ignored + #);)))) ignored + #)) causes error as cmd tries to run "))" as a command. + #This is a reason why *target* labels shouldn't appear in bracketed blocks - as code jumps to a point where ( ) will be unbalanced + + #target labels are literal with regards to % ie not subject to % expansion - but ^ must still be processed + if {[string first : $pline] >= 0} { + #space (and some other chars) allowed between colon and label at target - (but not at callsite) + set labelinfo [batchlib::get_target_label_from_line $pline] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + set target_label_record [list label $label line $p_linenum] + puts stdout "$labelinfo" + incr found_backward_label + set prior_label_posn_in_line [string first : $pline] + set prior_label_posn [expr {$pline_begin + $prior_label_posn_in_line}] + if {($prior_label_posn % 512) == 0} { + set p_ubound [expr {($prior_label_posn / 512) * 512}] + } else { + set p_ubound [expr {(($prior_label_posn /512) +1) * 512}] + } + set p_lbound [expr {$p_ubound - $labelsize}] + if {($prior_label_posn >= $p_lbound) && ($prior_label_posn <= $p_ubound)} { + dict set target_label_record error linestart_and_overlap + lappend error_labels [list label $label linestart_and_overlap $prior_label_posn callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold red]ERROR: target-label '$trimpline' at line $p_linenum and offset from file start: $prior_label_posn line start: $pline_begin[a]" + puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $p_ubound[a] [a+ yellow bold]from file start[a]" + puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 512 -linebase 1 -limit 1] + } else { + dict set target_label_record ok 1 + puts stdout "[a+ bold green]OK: file line: $p_linenum target-label '$trimpline' before call from line $callingline_num. Target is at offset from file start: $prior_label_posn line start: $pline_begin[a]" + } + dict lappend call_labels_found $label $target_label_record + } + #else - label we weren't searching for - even if at file boundary it should be picked up when actually searched? review + } + set spaninfo [fileline::range_spans_chunk_boundaries $pline_start $pline_end 512] + if {[dict get $spaninfo is_span]} { + #puts stdout "boundary spanning line $p_linenum byte range $pline_start -> $pline_end [a+ bold purple]$spaninfo[a]" + #check boundaries within the line + set boundaries [dict get $spaninfo boundaries] + foreach b $boundaries { + if {$b == 0} { + #skip - beginning of line already handled (review?) + continue + } + #overlap test is just a warning - we have a label-like thing overlapping the boundary + #todo - take account of fact that target label can be ": labelname" - so using just labelsize won't detect all overlaps + #The label could even be at the end of a long line that appears at first to be a comment e.g something like + # : whatever : sneakylabel + # or + #@REM ============================================================================================================================================================ : sneakylabel + + #The fact that it overlaps - means it's probably not being found with lf line-endings - and only the label :whatever should be found with crlf endings + #- but we won't always catch that something's fishy + #review + set overlaptail [string range $pline [expr {$b - $labelsize}] [expr {($b + $labelsize) -1}]] ;#subtracting labelsize gives earliest possible overlap + if {[string match "*:$label *" $overlaptail] } { + lappend warning_labels [list label $label warning label_spanning callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow] WARNING: possible label $label spans boundary $b from start of file" + } + + set pline_tail [string range $pline $b end] + + if {[string first : $pline_tail] >= 0} { + set labelinfo [batchlib::get_target_label_from_line $pline_tail] + set labelfound 0 + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + set labelfound 1 + } elseif {[dict get $labelinfo labelfound]} { + puts stdout "Note: detected target label [dict get $labelinfo label] at file offset $b at boundary with no preceeding newline - but it's not the one we're currently scanning for" + } + if {$labelfound} { + set label_found_name [dict get $labelinfo label] + incr found_backward_label + + lappend error_labels [list label $label_found_name file_offset_bytes $b note "label at boundary but no preceeding newline - cmd may interpret as label and execute following line or code at next boundary" callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + + puts stdout "[a+ bold red]ERROR: *possible* label '$label_found_name' at line $p_linenum and offset from file start: $b line start: $pline_begin[a]" + puts stdout "[a+ bold red] This label with no preceeding newline appears to span the 512byte boundary at byte $b[a] [a+ yellow bold]from file start[a]" + puts stdout "[a+ bold red] cmd.exe may find this label - but it probably shouldn't be relied upon[a]" + puts stdout "[a+ bold yellow] label starting at $b : $pline_tail[a]" + + set target_label_record [list label $label_found_name line $p_linenum] + if {$label_found_name in [dict keys $call_labels_found]} { + dict set target_label_record error "called_label_at_file_offset_boundary" + dict lappend target_labels_found $label_found_name $target_label_record + } else { + #review - we need to get better at finding all calls! + dict set target_label_record error "uncalled_label_at_file_offset_boundary" + dict lappend possible_target_labels_found $label_found_name $target_label_record + } + + + set tail_start $b + set tail_end [expr {$b + [string length $pline_tail]}] + set tail_spaninfo [fileline::range_spans_chunk_boundaries $tail_start $tail_end 512] + if {[dict get $tail_spaninfo is_span]} { + set tail_boundaries [dict get $tail_spaninfo boundaries] + set extra_tail_boundaries [lsearch -all -inline -not $tail_boundaries $b] + if {[llength $extra_tail_boundaries]} { + puts "Line $p_linenum also spans additional boundaries: $extra_tail_boundaries" + set next_boundary [lindex $extra_tail_boundaries 0] + #boundary doesn't reset if no crlf - we are still within the line - so can calc from line beginning + set next_boundary_data [string range $pline [expr {$pline_begin + $next_boundary}] end] + puts "Line $p_linenum data at next boundary: [a+ yellow bold]$next_boundary_data[a]" + puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 0 -boundaries $next_boundary -linebase 1 -limit 1] + + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } + } else { + if {$pidx+1 < [$objFile linecount]} { + set nextlineinfo [$objFile lineinfo $pidx+1] + set nextpayload [dict get $nextlineinfo payload] + puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextpayload[a]" + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } else { + #EOF reached + } + } + } + } + + } + + } + incr pline_begin $pline_bytes + } + + if {$found_forward_label == 0} { + if {[string toupper $label] eq "EOF"} { + #EOF/eof label is special - it doesn't have to exist - but if it does - it probably shouldn't be spanning a boundary + puts stdout "[a+ bold green]OK: label :$label doesn't exist - but it's usually not meant to. callsite: [list call ${call}${labelplus} call_linenum $callingline_num] [a]" + } else { + if {$found_backward_label == 0} { + lappend warning_labels [list label $label warning label_not_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow]WARNING: label :$label not found (in forward or backward scan)[a]" + } + } + } + if {($found_forward_label + $found_backward_label) > 1} { + #puts "target_labels_found: $target_labels_found" + dict for {targetkey targethits} $target_labels_found { + set targetlines [list] + foreach record $targethits { + lappend targetlines [dict get $record line] + } + set remaining [list] + set previous "" ; + foreach lnum [lsort -integer -increasing $targetlines] { + if {$previous eq ""} { + lappend remaining $lnum + } else { + if {$lnum-1 == $previous} { + puts stdout "[a+ green bold]OK[a] - target-label $targetkey appears on immediately adjacent lines $previous and $lnum - assuming it is a boundary-avoidance tactic rather than an inadvertent duplicate" + set remaining [lrange $remaining 0 end-1];#retain latest - we will allow a run of targets on subsequent lines + } + lappend remaining $lnum + } + set previous [lindex $remaining end] + } + if {[llength $remaining] > 1} { + lappend warning_labels [list label $label warning multiple_target_labels_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow]WARNING: label :$label seems to appear multiple times[a]" + } + } + } + } + } + } ;# end switch + incr file_offset $callingline_len ;#including per-line stored line-ending + } + if {[dict size $possible_target_labels_found] > 0} { + #puts stdout "Possibly bogus target-labels: [dict keys $possible_target_labels_found]" + set bogus_summary [list] + dict for {pb pbrecords} $possible_target_labels_found { + if {$pb in [dict keys $call_labels_found]} { + puts stdout "[a+ yellow bold]Warning - target for label $pb was found with a record as being possibly bogus. record: $pbrecords [a]" + puts stdout "[a+ yellow bold]Consider moving this target-label and re-checking[a]" + puts stdout "[a+ yellow bold]It may be a call label line that was found by boundary scanning - which shouldn't really happen[a]" + puts stdout "Call record [dict get $call_labels_found $pb]" + lappend warning_labels [list label $pb warning possibly_bogus_target list_of_target_hits $pbrecords] + } + set blines [list] + foreach rec $pbrecords { + lappend blines [dict get $rec line] + } + lappend bogus_summary [list label $pb found_on_lines $blines] + } + puts stdout "[a+ cyan]Possibly bogus target-labels: $bogus_summary[a]" + puts stdout "These are usually nothing to be concerned about. Some will almost always turn up in a polyglot script that contains batch script." + puts stdout "If some of the label names appear to contain newlines, or are prefixes of or exact matches with legitimate labels - you might consider adjusting the boundary spacing with whitespace or comments to get a different result." + } + set result "" + if {[llength $warning_labels]} { + append result "WARNING:" \n + append result "The following labels had warnings" \n + foreach w $warning_labels { + append result " [a+ bold yellow]$w[a]" \n + } + } + if {[llength $error_labels]} { + append result "ERROR: label location errors found" \n + append result "The following labels appear to span 512 Byte boundaries or occur on boundaries without a preceding newline and are likely to cause batch script errors" \n + append result "For labels spanning boundaries the label is likely to be missed by the batch interpreter" \n + append result "For labels occuring at boundaries but not at the beginning of a line, the label may be interpreted as a label when not expected, and the interpreter may run code on next line or next boundary" \n + append result "Try adding comments and/or spacing between the call site at the call_lineum indicated and the label and then re-test in case there are further boundary collisions" \n + foreach err $error_labels { + append result " [a+ bold red]$err[a]" \n + } + } + if {[dict size $warning_target_labels_found] > 0} { + puts stdout "target-labels with minor warnings: [dict keys $warning_target_labels_found]" + } + append result "call-labels-found: [dict keys $call_labels_found]" \n + append result "target-labels-found: [dict keys $target_labels_found]" \n + if {![llength $warning_labels] && ![llength $error_labels]} { + puts stderr \n + puts stderr "[a+ green bold]OK No warnings or errors considered major enough to return in result.[a]" + } + return $result + } + #specific filepath to just wrap one script at the xxx-pre-launch-suprocess site + #scriptset name to substiture multiple scriptset.xxx files at the default locations - or as specified in scriptset.wrapconf + proc multishell {filepath_or_scriptset args} { + set opts [dict create\ + -askme 1\ + -outputfolder "\uFFFF"\ + -template "\uFFFF"\ + -returnextra 0\ + -force 0\ + ] + #set known_opts [dict keys $defaults] + foreach {k v} $args { + switch -- $k { + -askme - -outputfolder - -template - -returnextra - -force { + dict set opts $k $v + } + default { + error "punk::mix::commandset::multishell error. Unrecognized option '$k'. Known-options: [dict keys $opts]" + } + } + } + + set usage "" + append usage "Use directly with the script file to wrap, or supply the name of a scriptset" \n + append usage "The scriptset name will be used to search for yourname.sh|tcl|ps1 or names as you specify in yourname.wrapconfig if it exists" \n + append usage "If no template is specified in a .wrapconfig and no -template argument is supplied, it will default to punk-multishell.cmd" \n + if {![string length $filepath_or_scriptset]} { + puts stderr "No filepath_or_scriptset specified" + puts stderr $usage + return false + } + # -- --- --- --- --- --- --- --- --- --- --- --- + set opt_askme [dict get $opts -askme] + set opt_template [dict get $opts -template] + set opt_outputfolder [dict get $opts -outputfolder] + set opt_returnextra [dict get $opts -returnextra] + set opt_force [dict get $opts -force] + # -- --- --- --- --- --- --- --- --- --- --- --- + + + set ext [file extension $filepath_or_scriptset] + set startdir [pwd] + + + + #first check if relative or absolute path matches a file + if {[file pathtype $filepath_or_scriptset] eq "absolute"} { + set specified_path $filepath_or_scriptset + } else { + set specified_path [file join $startdir $filepath_or_scriptset] + } + set ext [string trim [file extension $filepath_or_scriptset] .] + set allowed_extensions [list wrapconfig tcl ps1 sh bash pl] + set extension_langs [list tcl tcl ps1 powershell sh sh bash bash pl perl] + #set allowed_extensions [list tcl] + set found_script 0 + if {[file exists $specified_path]} { + set found_script 1 + } else { + foreach e [concat $allowed_extensions [string toupper $allowed_extensions]] { + if {[file exists $filepath_or_scriptset.$e]} { + set found_script 1 + break + } + } + } + + #TODO! - use get_wrapper_folders - multishell should use same available templates as the 'templates' function + set scriptset [file rootname [file tail $specified_path]] + if {$found_script} { + if {[file type $specified_path] eq "file"} { + set specified_root [file dirname $specified_path] + set pathinfo [punk::repo::find_repos [file dirname $specified_path]] + set projectroot [dict get $pathinfo closest] + if {[string length $projectroot]} { + #use the specified files folder - but use the main scriptapps/wrappers folder if specified one has no wrappers subfolder + set scriptroot [file dirname $specified_path] + if {[file exists $scriptroot/wrappers]} { + set customwrapper_folder $scriptroot/wrappers + } else { + set customwrapper_folder $projectroot/src/scriptapps/wrappers + } + } else { + #outside of any project + set scriptroot [file dirname $specified_path] + if {[file exists $scriptroot/wrappers]} { + set customwrapper_folder $scriptroot/wrappers + } else { + #no customwrapper folder available + set customwrapper_folder "" + } + } + } else { + puts stderr "wrap_in_multishell doesn't currently support a directory as the path." + puts stderr $usage + return false + } + } else { + set pathinfo [punk::repo::find_repos $startdir] + set projectroot [dict get $pathinfo closest] + if {[string length $projectroot]} { + if {[llength [file split $filepath_or_scriptset]] > 1} { + puts stderr "filepath_or_scriptset looks like a path - but doesn't seem to point to a file" + puts stderr "Ensure you are within a project and use just the name of the scriptset, or pass in the full correct path or relative path to current directory" + puts stderr $usage + return false + } else { + #we've already ruled out empty string - so must have a single element representing scriptset - possibly with file extension + set scriptroot $projectroot/src/scriptapps + set customwrapper_folder $projectroot/src/scriptapps/wrappers + #check something matches the scriptset.. + set something_found "" + if {[file exists $scriptroot/$scriptset]} { + set found_script 1 + set something_found $scriptroot/$scriptset ;#extensionless file - that's ok too + } else { + foreach e $allowed_extensions { + if {[file exists $scriptroot/$scriptset.$e]} { + set found_script 1 + set something_found $scriptroot/$scriptset.$e + break + } + } + } + if {!$found_script} { + puts stderr "Searched within $scriptroot" + puts stderr "Unable to find a file matching $scriptset or one of the extensions: $allowed_extensions" + puts stderr $usage + return false + } else { + if {[file type $something_found] ne "file"} { + puts stderr "Found '$something_found'" + puts stderr "wrap_in_multishell doesn't currently support a directory as the path." + puts stderr $usage + return false + } + } + } + + } else { + puts stderr "filepath_or_scriptset parameter doesn't seem to refer to a file, and you are not within a directory where projectroot and src/scriptapps/wrappers can be determined" + puts stderr $usage + return false + } + } + #assertion - customwrapper_folder var exists - but might be empty + + + if {[string length $ext]} { + #If there was an explicitly supplied extension - then that file should exist + if {![file exists $scriptroot/$scriptset.$ext]} { + puts stderr "Explicit extension .$ext was supplied - but matching file not found." + puts stderr $usage + return false + } else { + if {$ext eq "wrapconfig"} { + set process_extensions ALLFOUNDORCONFIGURED + } else { + set process_extensions $ext + } + } + } else { + #no explicit extension - process all for scriptset + set process_extensions ALLFOUNDORCONFIGURED + } + #process_extensions - either a single one - or all found or as per .wrapconfig + + if {$opt_template eq "\uFFFF"} { + set templatename punk.multishell.cmd + } else { + set templatename $opt_template + } + set templatename_root [file rootname [file tail $templatename]] + + #determine name of file on disk based on whether templatename is prefixed with vendor. + set templatename_vendor "" + set templatename_fileroot $templatename_root + if {[llength [split $templatename_root .]] > 1} { + set tparts [split $templatename_root .] + set templatename_vendor [lindex $tparts 0] + set templatename_fileroot [join [lrange $tparts 1 end] .] + } + + #assertion: templatename_fileroot is the base of the filname without the vendor and first dot + + set template_base_dict [punk::mix::base::lib::get_template_basefolders] + set tpldirs [list] + dict for {tdir tsourceinfo} $template_base_dict { + set vendor [dict get $tsourceinfo vendor] + if {[file exists $tdir/utility/scriptappwrappers/$templatename]} { + lappend tpldirs $tdir + } elseif {[file exists $tdir/utility/scriptappwrappers/${templatename_fileroot}[file extension $templatename]]} { + lappend tpldirs $tdir + } + } + + if {[string length $customwrapper_folder] && [file exists [file join $customwrapper_folder $templatename] ]} { + set wrapper_template [file join $customwrapper_folder $templatename] + } else { + if {![llength $tpldirs]} { + set msg "No template named '$templatename' found in src/scriptapps/wrappers or in template dirs from packages" + append msg \n "Searched [dict size $template_base_dict] template dirs" + error $msg + } + + #last pkg with templates cap which was loaded has highest precedence + set wrapper_template "" + foreach tdir [lreverse $tpldirs] { + set ftest1 [file join $tdir utility scriptappwrappers $templatename] + set ftest2 [file join $tdir utility scriptappwrappers $templatename_fileroot[file extension $templatename]] + if {[file exists $ftest1]} { + set wrapper_template $ftest1 + break + } elseif {[file exists $ftest2]} { + set wrapper_template $ftest2 + break + } + } + } + + if {$wrapper_template eq "" || ![file exists $wrapper_template]} { + error "wrap_in_multishell: unable to find multishell template $templatename in template folders [concat $tpldirs $customwrapper_folder]" + } + + + if {$opt_outputfolder eq "\uFFFF"} { + #outputfolder not explicitly specified by caller + if {[string length $projectroot]} { + set output_folder [file join $projectroot/bin] + } else { + set output_folder $startdir + } + } else { + if {[file pathtype $opt_outputfolder] eq "relative"} { + if {[string length $projectroot]} { + set output_folder [file join $projectroot $opt_outputfolder] + } else { + set output_folder [file join $startdir $opt_outputfolder] + } + } else { + set output_folder $opt_outputfolder + } + } + if {![file isdirectory $output_folder]} { + error "wrap_in_multishell: output folder '$output_folder' not found. Please ensure target directory exists" + } + + + #todo + #output_file extension may also depend on the template being used.. and/or the .wrapconfig + if {$::tcl_platform(platform) eq "windows"} { + set output_extension cmd + } else { + set output_extension sh + } + set output_file [file join $output_folder $scriptset.$output_extension] + if {[file exists $output_file]} { + set fdexisting [open $output_file r] + fconfigure $fdexisting -translation binary + set existing_file_data [read $fdexisting] + close $fdexisting + set objFile_existing [fileline::textinfo new $existing_file_data] + puts stdout "wrap_in_multishell: target file $output_file already exists. File size: [$objFile_existing chunklen] Line count: [$objFile_existing linecount]" + if {!$opt_force} { + if {$opt_askme} { + set answer [util::askuser "Do you want to overwrite $output_file? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "aborting due to user response '$answer' (required Y or y to proceed) use -force 1 or -askme 0 to avoid prompts." + $objFile_existing destroy + error "aborting.." + } + } else { + $objFile_existing destroy + error "aborting.." + } + } else { + puts stdout "overwriting $output_file because -force = $opt_force" + } + } + + + set fdt [open $wrapper_template r] + fconfigure $fdt -translation binary + set template_data [read $fdt] + close $fdt + puts stdout "Read [string length $template_data] bytes of template data.." + set template_lines [split $template_data \n] + puts stdout "Displaying first 3 lines of template between dashed lines..." + puts stdout "-----------------------------------------------" + foreach ln [lrange $template_lines 0 3] { + puts stdout $ln + } + puts stdout "-----------------------------------------------\n" + #foreach ln $template_lines { + #} + + set list_input_files [list] + if {$process_extensions eq "ALLFOUNDORCONFIGURED"} { + #todo - look for .wrapconfig or all extensions for the scriptset + puts stderr "Sorry - only single input file supported. Supply a file extension or use a .wrapconfig with a single input file for now - implementation incomplete" + return false + } else { + lappend list_input_files $scriptroot/$scriptset.$ext + } + + #todo - split template at each etc marker and build a dict of parts + + + #hack - process one input + set filepath [lindex $list_input_files 0] + + set fdscript [open $filepath r] + fconfigure $fdscript -translation binary + set script_data [read $fdscript] + close $fdscript + puts stdout "Read [string length $script_data] bytes of template data.." + set script_lines [split $script_data \n] + puts stdout "Displaying first 3 lines of your script between dashed lines..." + puts stdout "-----------------------------------------------" + foreach ln [lrange $script_lines 0 3] { + puts stdout $ln + } + puts stdout "-----------------------------------------------\n" + puts stdout "Target for above script data is '$output_file'" + set lang [dict get $extension_langs [string tolower $ext]] + puts stdout "Language of script being wrapped is $lang" + if {$opt_askme} { + set answer [util::askuser "Does this look correct? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y or y to proceed) use -askme 0 to avoid prompts." + return + } + } + + set start_idx 0 + set end_idx 0 + set line_idx 0 + set existing_payload [list] + foreach ln $template_lines { + + if {[string match "#<$lang-pre-launch-subprocess>*" $ln]} { + set start_idx $line_idx + } elseif {[string match "#*" $ln]} { + set end_idx $line_idx + break + } elseif {$start_idx > 0} { + if {$end_idx > 0} { + lappend existing_payload [string trim $ln] + } + } else { + + } + incr line_idx + } + if {($start_idx == 0) || ($end_idx == 0)} { + error "wrap_in_multishell was unable to find payload area in template marked with #<$lang-pre-launch-subprocess> and # on separate lines" + } + set existing_string [join $existing_payload \n] + if {[string length [string trim $existing_string]]} { + puts stdout "EXISTING <$lang-pre-launch-subprocess> PAYLOAD!!" + puts stdout "-----------------------------------------------\n" + puts stdout $existing_string + puts stdout "-----------------------------------------------\n" + error "wrap_in_multishell found existing payload for language $lang ... aborting." + #todo - allow overwrite only in files outside of punkshell distribution? + if 0 { + puts stderr "Found existing $lang payload.. overwrite?" + if {$opt_askme} { + set answer [util::askuser "Are you sure you want to replace the $lang payload shown above? Y|N"] + if {[string tolower $answer] ne "y"} { + puts stderr "mix new aborting due to user response '$answer' (required Y|y to proceed) use -askme 0 to avoid prompts." + return + } + } + } + } + + set tpl_head_lines [lrange $template_lines 0 $start_idx] ;#include tag line + set tpl_tail_lines [lrange $template_lines $end_idx end] + set newscript [join $tpl_head_lines \n]\n[join $script_lines \n]\n[join $tpl_tail_lines \n] + puts stdout "New script is [string length $newscript] bytes" + puts stdout $newscript + set fdtarget [open $output_file w] + fconfigure $fdtarget -translation binary + puts -nonewline $fdtarget $newscript + close $fdtarget + puts stdout "Wrote script file at $output_file" + set check_result [checkfile $output_file] + set with_errors "" + set with_warnings "" + set call_labels [list] + set target_labels [list] + set errorlist [list] + set warninglist [list] + if {$check_result ne ""} { + puts stdout $check_result + set check_lines [split $check_result \n] + foreach cl $check_lines { + set trimcl [string trim $cl] + if {[string match "ERROR:*" $trimcl]} { + set with_errors "[a+ bold red]with errors[a]" + lappend errorlist $trimcl + } + if {[string match "WARNING:*" $trimcl]} { + set with_warnings "[a+ bold yellow] with warnings[a]" + lappend errorlist $trimcl + } + if {[string match "call-labels-found:*" $trimcl]} { + set call_labels [string trim [string range $trimcl [string length "call-labels-found:"] end]] + } + if {[string match "target-labels-found:*" $trimcl]} { + set target_labels [string trim [string range $trimcl [string length "target-labels-found:"] end]] + } + } + } else { + puts stderr "Expected output from checkfile - but got none" + } + #even though chmod might exist on windows - we will leave permissions alone + if {$::tcl_platform(platform) ne "windows"} { + catch {exec chmod +x $output_file} + } + puts stdout "-done- $with_errors $with_warnings" + if {$opt_returnextra} { + set result [list filename $output_file batch_call_labels $call_labels batch_target_labels $target_labels] + if {[llength $warninglist]} { + dict set result warnings $warninglist + } + if {[llength $errorlist]} { + dict set result errors $errorlist + } + } else { + set result [list filename $output_file] + } + + return $result + } + + namespace eval lib { + + + proc get_wrapper_folders {args} { + set argd [punk::args::get_dict { + #*** !doctools + #[call [fun get_wrapper_folders] [arg args] ] + #[para] Return list of dicts representing wrapper folders. keys: basefolder sourceinfo + #[para] Arguments: + # [list_begin arguments] + # [arg_def string args] name-value pairs -scriptpath + # [list_end] + *proc -name get_wrapper_folders + *opts -anyopts 0 + -scriptpath -default "" + *values -minvalues 0 -maxvalues 0 + } $args] + + # -- --- --- --- --- --- --- --- --- + set opt_scriptpath [dict get $argd opts -scriptpath] + # -- --- --- --- --- --- --- --- --- + + set wrapper_template_bases [list] + set tbasedict [punk::mix::base::lib::get_template_basefolders $opt_scriptpath] + dict for {tbase folderinfo} $tbasedict { + set wrapf [file join $tbase utility/scriptappwrappers] + if {[file isdirectory $wrapf]} { + lappend wrapper_template_bases [list basefolder $wrapf sourceinfo $folderinfo] + } + } + return $wrapper_template_bases + } + + + + proc _scriptapp_tag_from_line {line} { + set result [list istag 0 raw ""] ;#default assumption. All + #---- + set startc [string first "#" $line] ;#tags must be commented + #todo - review. next line is valid - note # doesn't have to be the only one before + # @REM # etc < blah # etc + #--- + #fix - we should use a regexp on at least and only catch tagname without whitespace + regexp {(\s*).*} $line _ln indent ;#will match on empty line, whitespace only line - or anything really. + set indent [string map [list \t " "] $indent] ;#opinionated I guess - but need to normalize to something. The spec is that spaces should be used anyway. + dict set result indent [string length $indent] + set starttag [string first "<" $line] + set pretag [string range $line $startc $starttag-1] + if {[string match "*>*" $pretag]} { + return [list istag 0 raw $line reason pretag_contents] + } + set closetag [string first ">" $line] + set inelement [string range $line $starttag+1 $closetag-1] + if {[string match "*<*" $inelement]} { + return [list istag 0 raw $line reason tag_malformed_angles] + } + set elementchars [split $inelement ""] + set numslashes [llength [lsearch -all $elementchars "/"]] + if {$numslashes == 0} { + dict set result type "open" + } elseif {$numslashes == 1} { + if {[lindex $elementchars 0] eq "/"} { + dict set result type "close" + } elseif {[lindex $elementchars end] eq "/"} { + dict set result type "openclose" + } else { + return [list istag 0 raw $line reason tag_malformed_slashes] + } + } else { + return [list istag 0 raw $line reason tag_malformed_extraslashes] + } + if {[dict get $result type] eq "open"} { + dict set result name $inelement + } elseif {[dict get $result type] eq "close"} { + dict set result name [string range $inelement 1 end] + } else { + dict set result name [string range $inelement 0 end-1] + } + dict set result istag 1 + dict set result raw $line + return $result + } + + #get all \n#\n ...\n# data - where number of intervening newlines is at least one (and whitespace and/or other data can precede #) + #we don't verify 'something' against known tags - as custom templates can have own tags + #An openclose tag # is used to substitute a specific line in its entirety - but the tag *must* remain in the line + # + #e.g for the line: + # @set "nextshell=pwsh" & :: # + #The .wrapconfig might contain + # tag line {@set "nextshell=tclsh" & :: @} + # + proc scriptapp_wrapper_get_tags {wrapperdata} { + set wrapperdata [string map [list \r\n \n] $wrapperdata] + set lines [split $wrapperdata \n] + #set tags_in_data [dict create];#active tags - list of lines accumulating. supports nested tags + set status 0 + set tags [dict create] + set errors [list] + set errortags [dict create] ;#mark names invalid on first error so that more than 2 tags can't obscure config problem + set linenum 1 ;#editors and other utils use 1-based indexing when referencing files - we should too to avoid confusion, despite it being less natural for lindex operations on the result. + foreach ln $lines { + set lntrim [string trim $ln] + if {![string length $lntrim]} { + incr linenum + continue + } + if {[string match "*#*<*>*" $lntrim] || [string match "*:*<*>*" $lntrim]} { + set taginfo [_scriptapp_tag_from_line $ln] ;#use untrimmed line - to get indent + if {[dict get $taginfo istag]} { + set nm [dict get $taginfo name] + if {[dict exists $errortags $nm]} { + #tag is already in error condition - + } else { + set tp [dict get $taginfo type] ;# type singular - related to just one line + #set raw [dict get $taginfo raw] #equivalent to $ln + if {[dict exists $tags $nm]} { + #already seen tag name + #tags dict has types key *plural* - need to track whether we have type open and type close (or openclose for self-closing tags) + if {[dict get $tags $nm types] ne "open"} { + lappend errors "line: $linenum tag $nm encountered type $tp after existing type [dict get $tags $nm types]" + dict incr errortags $nm + } else { + #we already have open - expect only close + if {$tp ne "close"} { + lappend errors "line: $linenum tag $nm encountered type $tp after existing type [dict get $tags $nm types]" + dict incr errortags $nm + } else { + #close after open + dict set tags $nm types [list open close] + dict set tags $nm end $linenum + set taglines [dict get $tags $nm taglines] + if {[llength $taglines] != 1} { + error "Unexpected result when closing tag $nm. Existing taglines length not 1." + } + dict set tags $nm taglines [concat $taglines $ln] + } + } + } else { + #first seen of tag name + switch -- $tp { + close { + lappend errors "line: $linenum tag $nm encountered type $p close first" + dict incr errortags $nm + } + open { + dict set tags $nm types open + dict set tags $nm indent [dict get $taginfo indent] + dict set tags $nm start $linenum + dict set tags $nm taglines [list $ln] ;#first entry - another will be added on encountering matching closing tag + } + openclose { + dict set tags $nm types openclose + dict set tags $nm indent [dict get $taginfo indent] + dict set tags $nm start $linenum + dict set tags $nm end $linenum + dict set tags $nm taglines [list $ln] ;#single entry is final result for self-closing tag + } + } + } + } + } else { + #looks like it should be a tag.. but failed to even parse for some reason.. just add to errorlist + lappend errors "line: $linenum tag parse failure reason: [dict get $taginfo reason] raw line: [dict get $taginfo raw]" + } + } + #whether the line is tag or not append to any tags_in_data + #foreach t [dict keys $tags_in_data] { + # dict lappend tags_in_data $t $ln ;#accumulate raw lines - written to the tag entry in tags only on encountering a closing tag, then removed from tags_in_data + #} + incr linenum + } + #assert [expr {$linenum -1 == [llength $lines]}] + if {[llength $errors]} { + set status 0 + } else { + set status 1 + } + if {$linenum == 0} { + + } + return [dict create ok $status linecount [llength $lines] data $tags errors $errors] + } + + + } + + namespace eval batchlib { + # + #see also: https://www.dostips.com/forum/viewtopic.php?t=3803 'Rules for label names vs GOTO and CALL + # review - we may need different get_callsite_label functions? + + proc get_callsite_label {labelstart} { + #labelstart is the character immediately following the colon (which is optional at callsite) - a label such as ::label doesn't seem valid at call or target sites + #e.g for @goto %= possible comment=% :mylabe%%l etc + #we would expect to be passed only "mylabe%%1 etc" + #It is up to the caller to determine where a callsite label begins. + #note that: + #@REM ----- + #@goto ^ + #:label + #@REM----- + # is a valid callsite - but doesn't appear to be found by the label scanner as it's own target label even though :label is on it's own line from non-batch perspective + # so the caller will have to do some batch-style line processing to find all call sites + #Also, for the following 2 lines + #@REM ^ + #:label + # the label will be found - yet if the :label was a command such as @GOTO - it would not be run as a callsite + + + #a quick'n'dirty fix for some ways various escapes are handled within labels at callsite. + #There seem to be very different rules for labels at target site - presumably because they are not part of a command + # Mostly it seems target labels are more literal with regards to % chars - but ^ are processed the same way at target label + #some rules.. + #callsite labels can't have space between : and label - but target labels can + #label terminated by =,: even if prefixed by ^ and even if in squotes or dquotes + #squotes and dquotes otherwise pass through as part of label + #may resolve variables within the label - but characters from variable value can terminate. + #as we don't have access to the variable values - we should normalize %varname% to empty string at callsite - but perhaps emit warning somewhere + # The target labels don't seem to + #a single % resolves to empty - depending. (starts invar processing - and decides if it was a var depending on whether it was closed?) + #sequences of % don't begin a var - number of % in labelname = number of %s divided by 2 and rounded down. ie 1->0 2->1 3-> 1 4->2 5->2 6->3 etc + #spaces in % wrapped var names don't terminate label + #spaces aren't escaped by ^ or quoting + #sequences of ^ seem to follow same counting rule as % + #e.g @goto :la%path%bel where path begins with C:\Program Files.. becomes label :laC + + if {[string index $labelstart 0] in [list : " " \t = {;}]} { + #return everything as tail - nothing was consumed + return [list labelfound 0 note "invalid first character for callsite label" tail $labelstart] + } + + #The due to whitespace and most chars except : and % being alowed inside vars - it seems the best first step + # -------------- start % handling % + set inputchars [split $labelstart ""] + set percentrun 0 ;#0|1 because we use invar-toggling rather than running total of number of percents in a sequence + set invar 0 + set labelout "" + set varsfound [list] + set varterminals [list :] + set labelterminals [list + , {;} = " " \t] + set varname "" + set caretseq 0 + set inputconsumed 0 + foreach c $inputchars { + if {!$invar} { + switch -- $c { + "%" { + set caretseq 0 + set lookahead [lrange $inputchars $inputconsumed+1 end] + if {"%" in $lookahead} { + set invar 1 + incr percentrun + } else { + incr percentrun + } + } + "^" { + if {$caretseq} { + set caretseq 0 + append labelout "^" ;#for every pair encountered in direct sequence - second gets included in label + } else { + set caretseq 1 + } + } + default { + set caretseq 0 + if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + #subst %i with value - here we have no way of getting that - so use blank - user will get warning that target not found + set percentrun 0 + } else { + append labelout [string repeat % [expr {$percentrun / 2}]] + set percentrun 0 + if {$c in $labelterminals} { + break + } + append labelout $c + } + } + } + } else { + #in var - don't do anything with carets(?) + switch -- $c { + % { + if {$percentrun == 1} { + #double percent - rather than just an empty var - emit one % + append labelout % + set invar 0 + set percentrun 0 + } else { + #presume percentrun is 0 + set invar 0 + lappend varsfound $varname; set varname "" + } + } + : { + #$varterminals + set invar 0 + lappend varsfound $varname; set varname "" + } + default { + if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + #review - seems to terminate var - and substitute? + #this branch untested - case where we have %i and further % - what if it was %1var% ? does %1 get substituted ? or %1var% - test + set invar 0 + append varname $c + } else { + append varname $c + } + set percentrun 0 + } + } + + + #if {$c eq "%" && $percentrun == 1} { + # #double percent - rather than just an empty var - emit one % + # append labelout % + # set invar 0 + # set percentrun 0 + #} elseif {$c eq "%"} { + # #presume percentrun is 0 + # set invar 0 + # lappend varsfound $varname; set varname "" + #} elseif {$c in $varterminals} { + # set invar 0 + # lappend varsfound $varname; set varname "" + #} else { + # if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + # #review - seems to terminate var - and substitute? + # #this branch untested - case where we have %i and further % - what if it was %1var% ? does %1 get substituted ? or %1var% - test + # set invar 0 + # append varname $c + # } else { + # append varname $c + # } + # set percentrun 0 + #} + } + incr inputconsumed + } + # -------------- end % handling % + set tail [string range $labelstart $inputconsumed end] + #caret -- etc + if {$labelout eq ""} { + set resultdict [dict create labelfound 0] + if {[llength $varsfound]} { + dict set resultdict vars $varsfound + dict set resultdict note "empty label but vars exist - may be legit" + } else { + dict set resultdict note "empty label - no vars" + } + dict set resultdict tail $tail + return $resultdict + } + + return [list labelfound 1 label $labelout tail $tail] + } + proc get_target_label_from_line {labelline} { + #scan a whole line - or a 'line' starting at some chunk boundary we found for a label + #caller should resolve any trailing caret and subsequent line and include them in the call + #note that we may be scanning all sorts of things in a polyglot file - but we're interested in seeing if cmd.exe might interpret it as a label + #target labels don't have %var% processing - they will be literal + set firstcolon [string first : $labelline] + if {$firstcolon == -1} { + return [list labelfound 0 note "no_colon"] + } + set prefixpart [string range $labelline 0 $firstcolon-1] + set targetpart [string range $labelline $firstcolon+1 end] + + set prefixok 1;#default assumption + set invisible_prefix_chars [list {;} , = " " \t] + set prefixchars [split $prefixpart ""] + # % and ^ in the prefix - whether doubled etc or not - will stop label being found + #ANY first char seems to be allowed in prefixpart (it won't be colon, because we already split on that) + #perhaps this is done by cmd.exe to reduce off-by-one errors?? weird... + # but it does allow labels to be found in certain # tcl/bashsh comment lines, which could be both dangerous and ...useful. + #start prefix check at char 1 instead of 0 + foreach pchar [lrange $prefixchars 1 end] { + if {$pchar ni $invisible_prefix_chars} { + set prefixok 0 + break + } + } + if {!$prefixok} { + return [list labelfound 0 note "prefix_fail"] + } + + #no problems before colon - now see if targetpart can be interpreted as a label + #we again have some potential invisible chars before label begins. + set charindex [expr {$firstcolon +1}] ;#track position so we can return index of where we believe label begins + set targetchars [split $targetpart ""] + set inlabel 0 + set labelposn -1 + # --- + set inlabel_terminals [list : + " " \t \r \n] ;# , ; = don't seem to terminate a target label, but do terminate a calling label + # + and whitespace terminate caller and target + # --- + # consider: + #@goto :14^ + # :14^ + #caller is searching for label "14" but won't match - presumably target scanner has escaped the trailing space + set label "" + set rawlabel "" + set caretseq 0 ;# 0|1 + foreach tchar $targetchars { + if {$tchar in [list + :]} { + break + } + if {!$inlabel} { + if {$tchar ni $invisible_prefix_chars} { + #beginning of target label + set labelposn $charindex + set inlabel 1 + append rawlabel $tchar + if {$tchar eq "^"} { + set caretseq 1 + } else { + append label $tchar + } + } + } else { + if {$tchar in $inlabel_terminals} { + #caret stops them from terminating + if {$caretseq} { + set caretseq 0 + append label $tchar + append rawlabel $tchar + } else { + break + } + } else { + append rawlabel $tchar + if {$tchar eq "^"} { + if {$caretseq} { + set caretseq 0 + append label "^" ;#for every pair encountered in direct sequence - second gets included in label + } else { + set caretseq 1 + } + } else { + set caretseq 0 + append label $tchar ;#for target labels - all including %var% is directly part of the label target + } + } + } + incr charindex + } + if {$labelposn == -1} { + return [list labelfound 0 note "no_label_found_after_colon"] + } + + #return rawlabel so we can see it as it appears in the data - as opposed to how it is interpreted as a label by cmd.exe + return [list labelfound 1 label $label rawlabel $rawlabel] + } + } + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::commandset::scriptwrap [namespace eval punk::mix::commandset::scriptwrap { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates-0.1.0.tm new file mode 100644 index 00000000..dab5312f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates-0.1.0.tm @@ -0,0 +1,94 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::templates 0.1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::cap + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::templates { + variable pkg punk::mix::templates + variable cap_provider + + namespace eval capsystem { + if {[info commands capprovider.registration] eq ""} { + punk::cap::class::interface_capprovider.registration create capprovider.registration + oo::objdefine capprovider.registration { + method get_declarations {} { + set decls [list] + lappend decls [list punk.templates {path templates pathtype adhoc vendor _project}] ;#todo - split out to a different provider package? + + lappend decls [list punk.templates {path templates pathtype module vendor punk}] + #only punk::templates is allowed to register a _multivendor path - review + #other punk.template providers should use module, absolute, currentproject and shellproject pathtypes only + lappend decls [list punk.templates {path src/decktemplates pathtype currentproject_multivendor vendor punk}] + lappend decls [list punk.templates {path decktemplates pathtype shellproject_multivendor vendor punk}] + + + #we need a way to ensure we don't pull updates from a remote repo into a local project that is actually the same project ? review! + #need flags as to whether/how provider allows template updates that are out of sync with the provider pkg version + #perhaps a separate .txt file (alongside buildversion and description txt files) that has some package require statements (we can't put them in the template itself as the filled template may have nothing to do with the punk.templates provider) + lappend decls [list punk.templates {path src/decktemplates/vendor/punk pathtype currentproject vendor punk allowupdates 0 repo "https://www.gitea1.intx.com.au/jn/punkshell" reposubdir "src/decktemplates/vendor/punk"}] + lappend decls [list punk.isbogus {provider punk::mix::templates something blah}] ;#some capability for which there is no handler to validate - therefore no warning will result. + #review - we should report unhandled caps somewhere, or provide a mechanism to detect/report. + #we don't want to warn at the time this provider is loaded - as handler may legitimately be loaded later. + return $decls + } + } + } + } + + if {[info commands provider] eq ""} { + punk::cap::class::interface_capprovider.provider create provider punk::mix::templates + oo::objdefine provider { + method register {{capabilityname_glob *}} { + #puts registering punk::mix::templates $capabilityname + next $capabilityname_glob + } + method capabilities {} { + next + } + } + } + + # -- --- + #provider api + # -- --- + #none - declarations only + #todo - template folder install/update/status methods? +} + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::templates [namespace eval punk::mix::templates { + variable version + set version 0.1.0 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layout_refs/project@vendor+punk+project-0.1.ref b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layout_refs/project@vendor+punk+project-0.1.ref new file mode 100644 index 00000000..e69de29b diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl new file mode 100644 index 00000000..734ccb87 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layouts/project/src/build.tcl @@ -0,0 +1,6 @@ +#!/bin/sh +# -*- tcl -*- \ +# 'build.tcl' name as required by kettle +# Can be run directly - but also using `deck Kettle ...` or `deck KettleShell ...`\ +exec ./kettle -f "$0" "${1+$@}" +kettle doc diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl new file mode 100644 index 00000000..20b0c29f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/layouts/project/src/make.tcl @@ -0,0 +1,1092 @@ +# tcl +# +#make any tclkits and modules in src and place them and associated data files/scripts in the parent folder of src. +#e.g in 'bin' and 'modules' folders at same level as 'src' folder. + +set hashline "# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###" +puts $hashline +puts " punkshell make script " +puts $hashline\n +namespace eval ::punkmake { + variable scriptfolder [file normalize [file dirname [info script]]] + variable foldername [file tail $scriptfolder] + variable pkg_requirements [list]; variable pkg_missing [list];variable pkg_loaded [list] + variable non_help_flags [list -k] + variable help_flags [list -help --help /?] + variable known_commands [list project get-project-info shell vendor bootsupport] +} +if {"::try" ni [info commands ::try]} { + puts stderr "Tcl interpreter possibly too old - 'try' command not found - aborting" + exit 1 +} + +#------------------------------------------------------------------------------ +#Module loading from /src/bootsupport or src/*.vfs if script is within a .vfs folder +#------------------------------------------------------------------------------ +#If the there is a folder under the current directory /src/bootsupport/modules which contains .tm files +# - then it will attempt to preference these modules +# This allows a source update via 'fossil update' 'git pull' etc to pull in a minimal set of support modules for the make script +# and load these in preference to ones that may have been in the interp's tcl::tm::list or auto_path due to environment variables +set startdir [pwd] +if {[file exists [file join $startdir src bootsupport]]} { + set bootsupport_mod [file join $startdir src bootsupport modules] + set bootsupport_lib [file join $startdir src bootsupport lib] +} else { + set bootsupport_mod [file join $startdir bootsupport modules] + set bootsupport_lib [file join $startdir bootsupport lib] +} +if {[file exists $bootsupport_mod] || [file exists $bootsupport_lib]} { + + set original_tm_list [tcl::tm::list] + tcl::tm::remove {*}$original_tm_list + set original_auto_path $::auto_path + set ::auto_path [list $bootsupport_lib] + + set support_modules [glob -nocomplain -dir $bootsupport_mod -type f -tail *.tm] + set tcl_core_packages [list tcl::zlib zlib tcl::oo TclOO tcl::tommath tcl::zipfs Tcl Tk] ;#packages we + if {[llength $support_modules] || [llength [glob -nocomplain -dir $bootsupport_lib -tail *]]} { + #only forget all *unloaded* package names + foreach pkg [package names] { + if {$pkg in $tcl_core_packages} { + continue + } + if {![llength [package versions $pkg]]} { + #puts stderr "Got no versions for pkg $pkg" + continue + } + if {![string length [package provide $pkg]]} { + #no returned version indicates it wasn't loaded - so we can forget its index + package forget $pkg + } + } + tcl::tm::add $bootsupport_mod + } + + + if {[file exists [pwd]/modules]} { + tcl::tm::add [pwd]/modules + } + + #package require Thread + # - the full repl requires Threading and punk,shellfilter,shellrun to call and display properly. + + + # tm list already indexed - need 'package forget' to find modules based on current tcl::tm::list + #These are strong dependencies + package forget punk::mix + package require punk::mix + package forget punk::repo + package require punk::repo + package forget punkcheck + package require punkcheck + + + + #restore module paths and auto_path in addition to the bootsupport ones + set tm_list_now [tcl::tm::list] + foreach p $original_tm_list { + if {$p ni $tm_list_now} { + tcl::tm::add $p + } + } + set ::auto_path [list $bootsupport_lib {*}$original_auto_path] + #------------------------------------------------------------------------------ +} + +# ** *** *** *** *** *** *** *** *** *** *** *** +#*temporarily* hijack package command +# ** *** *** *** *** *** *** *** *** *** *** *** +try { + rename ::package ::punkmake::package_temp_aside + proc ::package {args} { + if {[lindex $args 0] eq "require"} { + lappend ::punkmake::pkg_requirements [lindex $args 1] + } + } + package require punk::mix + package require punk::repo +} finally { + catch {rename ::package ""} + catch {rename ::punkmake::package_temp_aside ::package} +} +# ** *** *** *** *** *** *** *** *** *** *** *** +foreach pkg $::punkmake::pkg_requirements { + if {[catch {package require $pkg} errM]} { + puts stderr "missing pkg: $pkg" + lappend ::punkmake::pkg_missing $pkg + } else { + lappend ::punkmake::pkg_loaded $pkg + } +} + + + + + +proc punkmake_gethelp {args} { + set scriptname [file tail [info script]] + append h "Usage:" \n + append h "" \n + append h " $scriptname -help or $scriptname --help or $scriptname /? or just $scriptname" \n + append h " - This help." \n \n + append h " $scriptname project ?-k?" \n + append h " - this is the literal word project - and confirms you want to run the project build" \n + append h " - the optional -k flag will terminate processes running as the executable being built (if applicable)" \n \n + append h " $scriptname bootsupport" \n + append h " - update the src/bootsupport modules as well as the mixtemplates/layouts//src/bootsupport modules if the folder exists" \n \n + append h " $scriptname vendor" \n + append h " - update the src/vendormodules based on src/vendormodules/include_modules.config" \n \n + append h " $scriptname get-project-info" \n + append h " - show the name and base folder of the project to be built" \n + append h "" \n + if {[llength $::punkmake::pkg_missing]} { + append h "* ** NOTE ** ***" \n + append h " punkmake has detected that the following packages could not be loaded:" \n + append h " " [join $::punkmake::pkg_missing "\n "] \n + append h "* ** *** *** ***" \n + append h " These packages are required for punk make to function" \n \n + append h "* ** *** *** ***" \n\n + append h "Successfully Loaded packages:" \n + append h " " [join $::punkmake::pkg_loaded "\n "] \n + } + return $h +} +set scriptargs $::argv +set do_help 0 +if {![llength $scriptargs]} { + set do_help 1 +} else { + foreach h $::punkmake::help_flags { + if {[lsearch $scriptargs $h] >= 0} { + set do_help 1 + break + } + } +} +set commands_found [list] +foreach a $scriptargs { + if {![string match -* $a]} { + lappend commands_found $a + } else { + if {$a ni $::punkmake::non_help_flags} { + set do_help 1 + } + } +} +if {[llength $commands_found] != 1 } { + set do_help 1 +} elseif {[lindex $commands_found 0] ni $::punkmake::known_commands} { + puts stderr "Unknown command: [lindex $commands_found 0]\n\n" + set do_help 1 +} +if {$do_help} { + puts stderr [punkmake_gethelp] + exit 0 +} + +set ::punkmake::command [lindex $commands_found 0] + + +if {[lsearch $::argv -k] >= 0} { + set forcekill 1 +} else { + set forcekill 0 +} +#puts stdout "::argv $::argv" +# ---------------------------------------- + +set scriptfolder $::punkmake::scriptfolder + + + +#first look for a project root (something under fossil or git revision control AND matches punk project folder structure) +#If that fails - just look for a 'project shaped folder' ie meets minimum requirements of /src /src/lib /src/modules /lib /modules +if {![string length [set projectroot [punk::repo::find_project $scriptfolder]]]} { + if {![string length [set projectroot [punk::repo::find_candidate $scriptfolder]]]} { + puts stderr "punkmake script unable to determine an approprite project root at or above the path '$scriptfolder' ensure the make script is within a project folder structure" + puts stderr " -aborted- " + exit 2 + #todo? + #ask user for a project name and create basic structure? + #call punk::mix::cli::new $projectname on parent folder? + } else { + puts stderr "WARNING punkmake script operating in project space that is not under version control" + } +} else { + +} + +set sourcefolder $projectroot/src + +if {$::punkmake::command eq "get-project-info"} { + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- -- get-project-info -- -" + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + puts stdout "- projectroot : $projectroot" + if {[punk::repo::find_fossil $scriptfolder] eq $projectroot} { + set vc "fossil" + set rev [punk::repo::fossil_revision $scriptfolder] + set rem [punk::repo::fossil_remote $scriptfolder] + } elseif {[punk::repo::find_git $scriptfolder] eq $projectroot} { + set vc "git" + set rev [punk::repo::git_revision $scriptfolder] + set rem [punk::repo::git_remote $scriptfolder] + } else { + set vc " - none found -" + set rev "n/a" + set remotes "n/a" + } + puts stdout "- version control : $vc" + puts stdout "- revision : $rev" + puts stdout "- remote : $rem" + puts stdout "- -- --- --- --- --- --- --- --- --- ---" + + exit 0 +} + +if {$::punkmake::command eq "shell"} { + package require punk + package require punk::repl + puts stderr "make shell not fully implemented - dropping into ordinary punk shell" + repl::start stdin + + exit 1 +} + +if {$::punkmake::command eq "vendor"} { + puts "projectroot: $projectroot" + puts "script: [info script]" + #puts "-- [tcl::tm::list] --" + puts stdout "Updating vendor modules" + proc vendor_localupdate {projectroot} { + set local_modules [list] + set git_modules [list] + set fossil_modules [list] + #todo vendor/lib ? + set vendor_config $projectroot/src/vendormodules/include_modules.config + if {[file exists $vendor_config]} { + set targetroot $projectroot/src/vendormodules/modules + source $vendor_config ;#populate $local_modules $git_modules $fossil_modules with project-specific list + if {![llength $local_modules]} { + puts stderr "No local vendor modules configured for updating (config file: $vendor_config)" + } else { + if {[catch { + #---------- + set vendor_installer [punkcheck::installtrack new make.tcl $projectroot/src/vendormodules/.punkcheck] + $vendor_installer set_source_target $projectroot $projectroot/src/vendormodules + set installation_event [$vendor_installer start_event {-make_step vendor}] + #---------- + } errM]} { + puts stderr "Unable to use punkcheck for vendor update. Error: $errM" + set installation_event "" + } + foreach {relpath module} $local_modules { + set module [string trim $module :] + set module_subpath [string map {:: /} [namespace qualifiers $module]] + set srclocation [file join $projectroot $relpath $module_subpath] + #puts stdout "$relpath $module $module_subpath $srclocation" + set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $module]-*] + #lsort won't sort version numbers properly e.g with -dictionary 0.1.1 comes before 0.1 + if {![llength $pkgmatches]} { + puts stderr "Missing source for vendor module $module - not found in $srclocation" + continue + } + set latestfile [lindex $pkgmatches 0] + set latestver [lindex [split [file rootname $latestfile] -] 1] + foreach m $pkgmatches { + lassign [split [file rootname $m] -] _pkg ver + #puts "comparing $ver vs $latestver" + if {[package vcompare $ver $latestver] == 1} { + set latestver $ver + set latestfile $m + } + } + set srcfile [file join $srclocation $latestfile] + set tgtfile [file join $targetroot $module_subpath $latestfile] + if {$installation_event ne ""} { + #---------- + $installation_event targetset_init INSTALL $tgtfile + $installation_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$installation_event targetset_source_changes] changed]]\ + || [llength [$installation_event get_targets_exist]] < [llength [$installation_event get_targets]]\ + } { + file mkdir [file dirname $tgtfile] ;#ensure containing folder for target exists + $installation_event targetset_started + # -- --- --- --- --- --- + puts "VENDOR update: $srcfile -> $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $installation_event targetset_end FAILED + } else { + $installation_event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts -nonewline stderr "." + $installation_event targetset_end SKIPPED + } + $installation_event end + } else { + file copy -force $srcfile $tgtfile + } + } + + } + } else { + puts stderr "No config at $vendor_config - nothing configured to update" + } + } + + + puts stdout " vendor package update done " + flush stderr + flush stdout + ::exit 0 +} + +if {$::punkmake::command eq "bootsupport"} { + puts "projectroot: $projectroot" + puts "script: [info script]" + #puts "-- [tcl::tm::list] --" + puts stdout "Updating bootsupport from local files" + + proc bootsupport_localupdate {projectroot} { + set bootsupport_modules [list] + set bootsupport_module_folders [list] + set bootsupport_config $projectroot/src/bootsupport/include_modules.config ;# + if {[file exists $bootsupport_config]} { + set targetroot $projectroot/src/bootsupport/modules + source $bootsupport_config ;#populate $bootsupport_modules with project-specific list + if {![llength $bootsupport_modules]} { + puts stderr "No local bootsupport modules configured for updating" + } else { + + if {[catch { + #---------- + set boot_installer [punkcheck::installtrack new make.tcl $projectroot/src/bootsupport/.punkcheck] + $boot_installer set_source_target $projectroot $projectroot/src/bootsupport + set boot_event [$boot_installer start_event {-make_step bootsupport}] + #---------- + } errM]} { + puts stderr "Unable to use punkcheck for bootsupport. Error: $errM" + set boot_event "" + } + + foreach {relpath module} $bootsupport_modules { + set module [string trim $module :] + set module_subpath [string map {:: /} [namespace qualifiers $module]] + set srclocation [file join $projectroot $relpath $module_subpath] + #puts stdout "$relpath $module $module_subpath $srclocation" + set pkgmatches [glob -nocomplain -dir $srclocation -tail [namespace tail $module]-*] + #lsort won't sort version numbers properly e.g with -dictionary 0.1.1 comes before 0.1 + if {![llength $pkgmatches]} { + puts stderr "Missing source for bootsupport module $module - not found in $srclocation" + continue + } + set latestfile [lindex $pkgmatches 0] + set latestver [lindex [split [file rootname $latestfile] -] 1] + foreach m $pkgmatches { + lassign [split [file rootname $m] -] _pkg ver + #puts "comparing $ver vs $latestver" + if {[package vcompare $ver $latestver] == 1} { + set latestver $ver + set latestfile $m + } + } + set srcfile [file join $srclocation $latestfile] + set tgtfile [file join $targetroot $module_subpath $latestfile] + if {$boot_event ne ""} { + #---------- + $boot_event targetset_init INSTALL $tgtfile + $boot_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$boot_event targetset_source_changes] changed]]\ + || [llength [$boot_event get_targets_exist]] < [llength [$boot_event get_targets]]\ + } { + file mkdir [file dirname $tgtfile] ;#ensure containing folder for target exists + $boot_event targetset_started + # -- --- --- --- --- --- + puts "BOOTSUPPORT update: $srcfile -> $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $boot_event targetset_end FAILED + } else { + $boot_event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts -nonewline stderr "." + $boot_event targetset_end SKIPPED + } + $boot_event end + } else { + file copy -force $srcfile $tgtfile + } + } + if {$boot_event ne ""} { + puts \n + $boot_event destroy + $boot_installer destroy + } + } + + if {[llength $bootsupport_module_folders] % 2 != 0} { + #todo - change include_modules.config structure to be line based? we have no way of verifying paired entries because we accept a flat list + puts stderr "WARNING - Skipping bootsupport_module_folders - list should be a list of base subpath pairs" + } else { + foreach {base subfolder} $bootsupport_module_folders { + #user should be careful not to include recursive/cyclic structures e.g module that has a folder which contains other modules from this project + #It will probably work somewhat.. but may make updates confusing.. or worse - start making deeper and deeper copies + set src [file join $projectroot $base $subfolder] + if {![file isdirectory $src]} { + puts stderr "bootsupport folder not found: $src" + continue + } + + #subfolder is the common relative path - so don't include the base in the target path + set tgt [file join $targetroot $subfolder] + file mkdir $tgt + + puts stdout "BOOTSUPPORT non_tm_files $src - copying to $tgt (if source file changed)" + set overwrite "installedsourcechanged-targets" + set resultdict [punkcheck::install_non_tm_files $src $tgt -installer make.tcl -overwrite $overwrite -punkcheck_folder $projectroot/src/bootsupport] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + } + } + + } + } + + bootsupport_localupdate $projectroot + + #/modules/punk/mix/templates/layouts only applies if the project has it's own copy of the punk/mix modules. Generally this should only apply to the punkshell project itself. + set layout_bases [list\ + $sourcefolder/project_layouts/custom/_project\ + ] + foreach project_layout_base $layout_bases { + if {[file exists $project_layout_base]} { + set project_layouts [glob -nocomplain -dir $project_layout_base -type d -tail *] + foreach layoutname $project_layouts { + #don't auto-create src/bootsupport - just update it if it exists + if {[file exists [file join $project_layout_base $layoutname/src/bootsupport]]} { + set antipaths [list\ + README.md\ + ] + set sourcemodules $projectroot/src/bootsupport/modules + set targetroot [file join $project_layout_base $layoutname/src/bootsupport/modules] + file mkdir $targetroot + + puts stdout "BOOTSUPPORT layouts/$layoutname: copying from $sourcemodules to $targetroot (if source file changed)" + set resultdict [punkcheck::install $sourcemodules $targetroot -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + flush stdout + } + } + } else { + puts stderr "No layout base at $project_layout_base" + } + } + puts stdout " bootsupport done " + flush stderr + flush stdout + #punk86 can hang if calling make.tcl via 'run' without this 'after' delay. punk87 unaffected. cause unknown. + #after 500 + ::exit 0 +} + + + +if {$::punkmake::command ne "project"} { + puts stderr "Command $::punkmake::command not implemented - aborting." + flush stderr + after 100 + exit 1 +} + +file mkdir $projectroot/lib ;#needs to exist + +#only a single consolidated /modules folder used for target +set target_modules_base $projectroot/modules +file mkdir $target_modules_base + + +#external libs and modules first - and any supporting files - no 'building' required +if {[file exists $sourcefolder/vendorlib]} { + #exclude README.md from source folder - but only the root one + #-antiglob_paths takes relative patterns e.g + # */test.txt will only match test.txt exactly one level deep. + # */*/*.foo will match any path ending in .foo that is exactly 2 levels deep. + # **/test.txt will match at any level below the root (but not in the root) + set antipaths [list\ + README.md\ + ] + + puts stdout "VENDORLIB: copying from $sourcefolder/vendorlib to $projectroot/lib (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/vendorlib $projectroot/lib -overwrite installedsourcechanged-targets -antiglob_paths $antipaths] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] + +} else { + puts stderr "VENDORLIB: No src/vendorlib folder found." +} + +if {[file exists $sourcefolder/vendormodules]} { + #install .tm *and other files* + puts stdout "VENDORMODULES: copying from $sourcefolder/vendormodules to $target_modules_base (if source file changed)" + set resultdict [punkcheck::install $sourcefolder/vendormodules $target_modules_base -installer make.tcl -overwrite installedsourcechanged-targets -antiglob_paths {README.md include_modules.config}] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] +} else { + puts stderr "VENDORMODULES: No src/vendormodules folder found." +} + +######################################################## +#templates +#e.g The default project layout is mainly folder structure and readme files - but has some scripts developed under the main src that we want to sync +#src to src/modules/punk/mix/templates/layouts/project/src + +set old_layout_update_list [list\ + [list project $sourcefolder/modules/punk/mix/templates]\ + [list basic $sourcefolder/mixtemplates]\ + ] +set layout_bases [list\ + $sourcefolder/project_layouts/custom/_project\ + ] + +foreach layoutbase $layout_bases { + if {![file exists $layoutbase]} { + continue + } + set project_layouts [glob -nocomplain -dir $layoutbase -type d -tail *] + foreach layoutname $project_layouts { + set config [dict create\ + -make-step sync_layouts\ + ] + #---------- + set tpl_installer [punkcheck::installtrack new make.tcl $layoutbase/.punkcheck] + $tpl_installer set_source_target $sourcefolder $layoutbase + set tpl_event [$tpl_installer start_event $config] + #---------- + set pairs [list] + set pairs [list\ + [list $sourcefolder/build.tcl $layoutbase/$layoutname/src/build.tcl]\ + [list $sourcefolder/make.tcl $layoutbase/$layoutname/src/make.tcl]\ + ] + + foreach filepair $pairs { + lassign $filepair srcfile tgtfile + + file mkdir [file dirname $tgtfile] + #---------- + $tpl_event targetset_init INSTALL $tgtfile + $tpl_event targetset_addsource $srcfile + #---------- + if {\ + [llength [dict get [$tpl_event targetset_source_changes] changed]]\ + || [llength [$tpl_event get_targets_exist]] < [llength [$tpl_event get_targets]]\ + } { + $tpl_event targetset_started + # -- --- --- --- --- --- + puts stdout "PROJECT LAYOUT update - layoutname: $layoutname Copying from $srcfile to $tgtfile" + if {[catch { + file copy -force $srcfile $tgtfile + } errM]} { + $tpl_event targetset_end FAILED -note "layout:$layoutname copy failed with err: $errM" + } else { + $tpl_event targetset_end OK -note "layout:$layoutname" + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $tpl_event targetset_end SKIPPED + } + } + + $tpl_event end + $tpl_event destroy + $tpl_installer destroy + } +} +######################################################## + + +#default source module folder is at projectroot/src/modules +#There may be multiple other src module folders at same level (e.g folder not being other special-purpose folder and not matching name vendor* that contains at least one .tm file in its root) +set source_module_folderlist [punk::mix::cli::lib::find_source_module_paths $projectroot] +foreach src_module_dir $source_module_folderlist { + puts stderr "Processing source module dir: $src_module_dir" + set dirtail [file tail $src_module_dir] + #modules and associated files belonging to this package/app + set copied [punk::mix::cli::lib::build_modules_from_source_to_base $src_module_dir $target_modules_base -glob *.tm] ;#will only accept a glob ending in .tm + #set copied [list] + puts stdout "--------------------------" + puts stderr "Copied [llength $copied] tm modules from src/$dirtail to $target_modules_base " + puts stdout "--------------------------" + + set overwrite "installedsourcechanged-targets" + #set overwrite "ALL-TARGETS" + puts stdout "MODULEFOLDER non_tm_files $src_module_dir - copying to $target_modules_base (if source file changed)" + set resultdict [punkcheck::install_non_tm_files $src_module_dir $target_modules_base -installer make.tcl -overwrite $overwrite -antiglob_paths {README.md}] + puts stdout [punkcheck::summarize_install_resultdict $resultdict] +} + +set installername "make.tcl" + +# ---------------------------------------- +if {[punk::repo::is_fossil_root $projectroot]} { + set config [dict create\ + -make-step configure_fossil\ + ] + #---------- + set installer [punkcheck::installtrack new $installername $projectroot/.punkcheck] + $installer set_source_target $projectroot $projectroot + + set event [$installer start_event $config] + $event targetset_init VIRTUAL fossil_settings_mainmenu ;#VIRTUAL - since there is no actual target file + set menufile $projectroot/.fossil-custom/mainmenu + $event targetset_addsource $menufile + #---------- + + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "Configuring fossil setting: mainmenu from: $menufile" + if {[catch { + set fd [open $menufile r] + fconfigure $fd -translation binary + set data [read $fd] + close $fd + exec fossil settings mainmenu $data + } errM]} { + $event targetset_end FAILED -note "fossil update failed: $errM" + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $event targetset_end SKIPPED + } + $event end + $event destroy + $installer destroy +} + +set buildfolder [punk::mix::cli::lib::get_build_workdir $sourcefolder] +if {$buildfolder ne "$sourcefolder/_build"} { + puts stderr "$sourcefolder/_build doesn't match the project buildfolder $buildfolder - check project filestructure" + puts stdout " -aborted- " + exit 2 +} + + +#find runtimes +set rtfolder $sourcefolder/runtime +set runtimes [glob -nocomplain -dir $rtfolder -types {f x} -tail *] +if {![llength $runtimes]} { + puts stderr "No executable runtimes found in $rtfolder - unable to build any .vfs folders into executables." + puts stderr "Add runtimes to $sourcefolder/runtime if required" + exit 0 +} + +if {[catch {exec sdx help} errM]} { + puts stderr "FAILED to find usable sdx command - check that sdx executable is on path" + puts stderr "err: $errM" + exit 1 +} + +# -- --- --- --- --- --- --- --- --- --- +#load mapvfs.config file (if any) in runtime folder to map runtimes to vfs folders. +#build a dict keyed on runtime executable name. +#If no mapfile (or no mapfile entry for that runtime) - the runtime will be paired with a matching .vfs folder in src folder. e.g punk.exe to src/punk.vfs +#If vfs folders or runtime executables which are explicitly listed in the mapfile don't exist - warn on stderr - but continue. if such nonexistants found; prompt user for whether to continue or abort. +set mapfile $rtfolder/mapvfs.config +set runtime_vfs_map [dict create] +set vfs_runtime_map [dict create] +if {[file exists $mapfile]} { + set fdmap [open $mapfile r] + fconfigure $fdmap -translation binary + set mapdata [read $fdmap] + close $fdmap + set mapdata [string map {\r\n \n} $mapdata] + set missing [list] + foreach ln [split $mapdata \n] { + set ln [string trim $ln] + if {$ln eq "" || [string match #* $ln]} { + continue + } + set vfspaths [lassign $ln runtime] + if {[string match *.exe $runtime]} { + #.exe is superfluous but allowed + #drop windows .exe suffix so same config can work cross platform - extension will be re-added if necessary later + set runtime [string range $runtime 0 end-4] + } + if {$runtime ne "-"} { + set runtime_test $runtime + if {"windows" eq $::tcl_platform(platform)} { + set runtime_test $runtime.exe + } + if {![file exists [file join $rtfolder $runtime_test]]} { + puts stderr "WARNING: Missing runtime file $rtfolder/$runtime_test (line in mapvfs.config: $ln)" + lappend missing $runtime + } + } + foreach vfs $vfspaths { + if {![file isdirectory [file join $sourcefolder $vfs]]} { + puts stderr "WARNNING: Missing vfs folder [file join $sourcefolder $vfs] specified in mapvfs.config for runtime $runtime" + lappend missing $vfs + } + dict lappend vfs_runtime_map $vfs $runtime + } + if {[dict exists $runtime_vfs_map $runtime]} { + puts stderr "CONFIG FILE ERROR. runtime: $runtime was specified more than once in $mapfile." + exit 3 + } + dict set runtime_vfs_map $runtime $vfspaths + } + if {[llength $missing]} { + puts stderr "WARNING [llength $missing] missing items from $mapfile. (TODO - prompt user to continue/abort)" + foreach m $missing { + puts stderr " $m" + } + puts stderr "continuing..." + } +} +# -- --- --- --- --- --- --- --- --- --- + +set vfs_folders [glob -nocomplain -dir $sourcefolder -types d -tail *.vfs] +#add any extra .vfs folders found in runtime/mapvfs.config file (e.g myotherruntimes/something.vfs) +dict for {vfs -} $vfs_runtime_map { + if {$vfs ni $vfs_folders} { + lappend vfs_folders $vfs + } +} +if {![llength $vfs_folders]} { + puts stdout "No .vfs folders found at '$sourcefolder' - no kits to build" + puts stdout " -done- " + exit 0 +} + +set vfs_folder_changes [dict create] ;#cache whether each .vfs folder has changes so we don't re-run tests if building from same .vfs with multiple runtime executables + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- +#set runtimefile [lindex $runtimes 0] +foreach runtimefile $runtimes { + #runtimefile e.g tclkit86bi.exe on windows tclkit86bi on other platforms + + #sdx *may* be pointed to use the runtime we use to build the kit, or the user may manually use this runtime if they don't have tclsh + #sdx will complain if the same runtime is used for the shell as is used in the -runtime argument - so we make a copy (REVIEW) + #if {![file exists $buildfolder/buildruntime.exe]} { + # file copy $rtfolder/$runtimefile $buildfolder/buildruntime.exe + #} + + set basedir $buildfolder + set config [dict create\ + -make-step copy_runtime\ + ] + #---------- + set installer [punkcheck::installtrack new $installername $basedir/.punkcheck] + $installer set_source_target $rtfolder $buildfolder + set event [$installer start_event $config] + $event targetset_init INSTALL $buildfolder/build_$runtimefile + $event targetset_addsource $rtfolder/$runtimefile + #---------- + + #set changed_unchanged [punkcheck::recordlist::file_install_record_source_changes [lindex [dict get $file_record body] end]] + if {\ + [llength [dict get [$event targetset_source_changes] changed]]\ + || [llength [$event get_targets_exist]] < [llength [$event get_targets]]\ + } { + $event targetset_started + # -- --- --- --- --- --- + puts stdout "Copying runtime from $rtfolder/$runtimefile to $buildfolder/build_$runtimefile" + if {[catch { + file copy -force $rtfolder/$runtimefile $buildfolder/build_$runtimefile + } errM]} { + $event targetset_end FAILED + } else { + $event targetset_end OK + } + # -- --- --- --- --- --- + } else { + puts stderr "." + $event targetset_end SKIPPED + } + $event end + +} + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + +# +# loop over vfs_folders and for each one, loop over configured (or matching) runtimes - build with sdx if source .vfs or source runtime exe has changed. +# we are using punkcheck to install result to buildfolder so we create a .punkcheck file at the target folder to store metadata. +# punkcheck allows us to not rely purely on timestamps (which may be unreliable) +# +set startdir [pwd] +puts stdout "Found [llength $vfs_folders] .vfs folders - building executable for each..." +cd [file dirname $buildfolder] +#root folder mtime is insufficient for change detection. Tree mtime of folders only is a barely passable mechanism for vfs change detection in some circumstances - e.g if files added/removed but never edited in place +#a hash of full tree file & dir mtime may be more reasonable - but it remains to be seen if just tar & checksum is any/much slower. +#Simply rebuilding all the time may be close the speed of detecting change anyway - and almost certainly much faster when there is a change. +#Using first mtime encountered that is later than target is another option - but likely to be highly variable in speed. Last file in the tree could happen to be the latest, and this mechanism doesn't handle build on reversion to older source. +set exe_names_seen [list] +foreach vfs $vfs_folders { + + set vfsname [file rootname $vfs] + puts stdout " Processing vfs $sourcefolder/$vfs" + puts stdout " ------------------------------------" + set skipped_vfs_build 0 + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + set basedir $buildfolder + set config [dict create\ + -make-step build_vfs\ + ] + + set runtimes [list] + if {[dict exists $vfs_runtime_map $vfs]} { + set runtimes [dict get $vfs_runtime_map $vfs] ;#map dict is unsuffixed (.exe stripped or was not present) + if {"windows" eq $::tcl_platform(platform)} { + set runtimes_raw $runtimes + set runtimes [list] + foreach rt $runtimes_raw { + if {![string match *.exe $rt] && $rt ne "-"} { + set rt $rt.exe + } + lappend runtimes $rt + } + } + } else { + #only match this vfs to a correspondingly named runtime if there was no explicit entry for that runtime + set matchrt [file rootname [file tail $vfs]] ;#e.g project.vfs -> project + if {![dict exists $runtime_vfs_map $matchrt]} { + if {"windows" eq $::tcl_platform(platform)} { + if {[file exists $rtfolder/$matchrt.exe]} { + lappend runtimes $matchrt.exe + } + } else { + lappend runtimes $matchrt + } + } + } + #assert $runtimes is a list of executable names suffixed with .exe if on windows - whether or not specified with .exe in the mapvfs.config + + + #todo - non kit based - zipkit? + # $runtimes may now include a dash entry "-" (from mapvfs.config file) + foreach rtname $runtimes { + #rtname of "-" indicates build a kit without a runtime + + #first configured runtime will be the one to use the same name as .vfs folder for output executable. Additional runtimes on this .vfs will need to suffix the runtime name to disambiguate. + #review: This mechanism may not be great for multiplatform builds ? We may be better off consistently combining vfsname and rtname and letting a later platform-specific step choose ones to install in bin with simpler names. + if {$rtname eq "-"} { + set targetkit $vfsname.kit + } else { + if {$::tcl_platform(platform) eq "windows"} { + set targetkit ${vfsname}.exe + } else { + set targetkit $vfsname + } + if {$targetkit in $exe_names_seen} { + #more than one runtime for this .vfs + set targetkit ${vfsname}_$rtname + } + } + lappend exe_names_seen $targetkit + # -- ---------- + set vfs_installer [punkcheck::installtrack new $installername $basedir/.punkcheck] + $vfs_installer set_source_target $sourcefolder $buildfolder + set vfs_event [$vfs_installer start_event {-make-step build_vfs}] + $vfs_event targetset_init INSTALL $buildfolder/$targetkit + $vfs_event targetset_addsource $sourcefolder/$vfs + if {$rtname ne "-"} { + $vfs_event targetset_addsource $buildfolder/build_$rtname + } + # -- ---------- + + set changed_unchanged [$vfs_event targetset_source_changes] + + if {[llength [dict get $changed_unchanged changed]] || [llength [$vfs_event get_targets_exist]] < [llength [$vfs_event get_targets]]} { + #source .vfs folder has changes + $vfs_event targetset_started + # -- --- --- --- --- --- + + #use + if {[file exists $buildfolder/$vfsname.new]} { + puts stderr "deleting existing $buildfolder/$vfsname.new" + file delete $buildfolder/$vfsname.new + } + + puts stdout "building $vfsname with sdx.. vfsdir:$vfs cwd: [pwd]" + + + if {[catch { + if {$rtname ne "-"} { + exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$rtname -verbose + } else { + exec sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose + } + } result]} { + if {$rtname ne "-"} { + puts stderr "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -runtime $buildfolder/build_$rtname -verbose failed with msg: $result" + } else { + puts stderr "sdx wrap $buildfolder/$vfsname.new -vfs $sourcefolder/$vfs -verbose failed with msg: $result" + } + } else { + puts stdout "ok - finished sdx" + set separator [string repeat = 40] + puts stdout $separator + puts stdout $result + puts stdout $separator + } + + if {![file exists $buildfolder/$vfsname.new]} { + puts stderr "|err> make.tcl build didn't seem to produce output at $sourcefolder/_build/$vfsname.new" + $vfs_event targetset_end FAILED + exit 2 + } + + # -- --- --- + if {$::tcl_platform(platform) eq "windows"} { + set pscmd "tasklist" + } else { + set pscmd "ps" + } + + #killing process doesn't apply to .kit build + if {$rtname ne "-"} { + if {![catch { + exec $pscmd | grep $vfsname + } still_running]} { + + puts stdout "found $vfsname instances still running\n" + set count_killed 0 + foreach ln [split $still_running \n] { + puts stdout " $ln" + + if {$::tcl_platform(platform) eq "windows"} { + set pid [lindex $ln 1] + if {$forcekill} { + set killcmd [list taskkill /F /PID $pid] + } else { + set killcmd [list taskkill /PID $pid] + } + } else { + set pid [lindex $ln 0] + #review! + if {$forcekill} { + set killcmd [list kill -9 $pid] + } else { + set killcmd [list kill $pid] + } + } + puts stdout " pid: $pid (attempting to kill now using '$killcmd')" + if {[catch { + exec {*}$killcmd + } errMsg]} { + puts stderr "$killcmd returned an error:" + puts stderr $errMsg + if {!$forcekill} { + puts stderr "(try '[info script] -k' option to force kill)" + } + #avoid exiting if the kill failure was because the task has already exited + #review - *no running instance* works with windows taskkill - "*No such process*" works with kill -9 on FreeBSD and linux - other platforms? + if {![string match "*no running instance*" $errMsg] && ![string match "*No such process*" $errMsg]} { + exit 4 + } + } else { + puts stderr "$killcmd ran without error" + incr count_killed + } + } + if {$count_killed > 0} { + puts stderr "\nKilled $count_killed processes. Waiting a short time before attempting to delete executable" + after 1000 + } + } else { + puts stderr "Ok.. no running '$vfsname' processes found" + } + } + + if {[file exists $buildfolder/$targetkit]} { + puts stderr "deleting existing $buildfolder/$targetkit" + if {[catch { + file delete $buildfolder/$targetkit + } msg]} { + puts stderr "Failed to delete $buildfolder/$targetkit" + exit 4 + } + } + #WINDOWS filesystem 'tunneling' (file replacement within 15secs) could cause targetkit to copy ctime & shortname metadata from previous file! + #This is probably harmless - but worth being aware of. + file rename $buildfolder/$vfsname.new $buildfolder/$targetkit + # -- --- --- --- --- --- + $vfs_event targetset_end OK + + + after 200 + set deployment_folder [file dirname $sourcefolder]/bin + file mkdir $deployment_folder + + # -- ---------- + set bin_installer [punkcheck::installtrack new "make.tcl" $deployment_folder/.punkcheck] + $bin_installer set_source_target $buildfolder $deployment_folder + set bin_event [$bin_installer start_event {-make-step final_kit_install}] + $bin_event targetset_init INSTALL $deployment_folder/$targetkit + #todo - move final deployment step outside of the build vfs loop? (final deployment can fail and then isn't rerun even though _build and deployed versions differ, unless .vfs modified again) + #set last_completion [$bin_event targetset_last_complete] + + $bin_event targetset_addsource $buildfolder/$targetkit + $bin_event targetset_started + # -- ---------- + + + set delete_failed 0 + if {[file exists $deployment_folder/$targetkit]} { + puts stderr "deleting existing deployed at $deployment_folder/$targetkit" + if {[catch { + file delete $deployment_folder/$targetkit + } errMsg]} { + puts stderr "deletion of deployed version at $deployment_folder/$targetkit failed: $errMsg" + set delete_failed 1 + } + } + if {!$delete_failed} { + puts stdout "copying.." + puts stdout "$buildfolder/$targetkit" + puts stdout "to:" + puts stdout "$deployment_folder/$targetkit" + after 300 + file copy $buildfolder/$targetkit $deployment_folder/$targetkit + # -- ---------- + $bin_event targetset_end OK + # -- ---------- + } else { + $bin_event targetset_end FAILED -note "could not delete" + exit 5 + } + $bin_event destroy + $bin_installer destroy + + } else { + set skipped_vfs_build 1 + puts stderr "." + puts stdout "Skipping build for vfs $vfs with runtime $rtname - no change detected" + $vfs_event targetset_end SKIPPED + } + $vfs_event destroy + $vfs_installer destroy + } ;#end foreach rtname in runtimes + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- +} +cd $startdir + +puts stdout "done" +exit 0 + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/modpod-module-version/modpod-loadscript.tcl b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/modpod-module-version/modpod-loadscript.tcl new file mode 100644 index 00000000..99320f87 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/modpod-module-version/modpod-loadscript.tcl @@ -0,0 +1,53 @@ +apply {code { + set scriptpath [file normalize [info script]] + if {[string match "#modpod-loadscript*.tcl" [file tail $scriptpath]]} { + #jump up an extra dir level if we are within a #modpod-loadscript file. + set mypath [file dirname [file dirname $scriptpath]] + #expect to be in folder #modpod-- + #Now we need to test if we are in a mounted folder vs an extracted folder + set container [file dirname $mypath] + if {[string match "#mounted-modpod-*" $container]} { + set mypath [file dirname $container] + } + set modver [string range [file tail [file dirname $scriptpath]] 8 end] ;# the containing folder is named #modpod-- + } else { + set mypath [file dirname $scriptpath] + set modver [file root [file tail [info script]]] + } + set mysegs [file split $mypath] + set overhang [list] + foreach libpath [tcl::tm::list] { + set libsegs [file split $libpath] ;#split and rejoin with '/' because sometimes module paths may have mixed \ & / + if {[file join $mysegs /] eq [file join [lrange $libsegs 0 [llength $mysegs]] /]} { + #mypath is below libpath + set overhang [lrange $mysegs [llength $libsegs]+1 end] + break + } + } + lassign [split $modver -] moduletail version + set ns [join [concat $overhang $moduletail] ::] + #if {![catch {package require modpod}]} { + # ::modpod::disconnect [info script] + #} + package provide $ns $version + namespace eval $ns $code +} ::} { + # + # Module procs here, where current namespace is that of the module. + # Package version can, if needed, be accessed as [uplevel 1 {set version}] + # Last element of module name: [uplevel 1 {set moduletail}] + # Full module name: [uplevel 1 {set ns}] + + # + # + # + + # + # + # + + # + # + # + +} diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/modpod-module-version/z b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/modpod-module-version/z new file mode 100644 index 00000000..a8f7b05a --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/modpod-module-version/z @@ -0,0 +1,2 @@ +#Do not remove the trailing ctrl-z character from this file + \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/test.zip b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/test.zip new file mode 100644 index 00000000..665234de Binary files /dev/null and b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modpod/template_modpod-0.0.1/test.zip differ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modules/modulename_buildversion.txt b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modules/modulename_buildversion.txt new file mode 100644 index 00000000..6266c016 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modules/modulename_buildversion.txt @@ -0,0 +1,3 @@ +%Major.Minor.Level% +#First line must be a semantic version number +#all other lines are ignored. diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modules/modulename_description.txt b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modules/modulename_description.txt new file mode 100644 index 00000000..571e4cf5 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/modules/modulename_description.txt @@ -0,0 +1,10 @@ +Identifier: %package% +Version: %version% +Title: %title% +Creator: %name% <%email%> +Description: %description% +Rights: BSD +URL: %url% +Available: +Architecture: tcl +Subject: diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/a b/tcltest.bat b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/a b/tcltest.bat new file mode 100644 index 00000000..b75201df --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/a b/tcltest.bat @@ -0,0 +1,7 @@ +::lindex tcl;#\ +@call tclsh "%~dp0%~n0.bat" %* & goto :eof +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl +puts stdout "script: [info script]" +puts stdout "argv: $::argc" +puts stdout "args: '$::argv'" + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/multishell.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/multishell.cmd new file mode 100644 index 00000000..9d903392 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/multishell.cmd @@ -0,0 +1,264 @@ +set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershell;proc Hide s {proc $s args {}}; Hide :;rename set s2;Hide set;s2 1 list]"; set -- : "$@"; $1 = @' +: heredoc1 - hide from powershell (close sqote for unix shells) ' \ +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl \ +: "[Hide @ECHO; Hide ); Hide (;Hide echo]#not necessary but can help avoid errs in testing" +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +@REM { +@REM DO NOT MODIFY FIRST LINE OF THIS SCRIPT. shebang #! line is not required and will reduce functionality. +@REM Even comment lines can be part of the functionality of this script - modify with care. +@REM Change the value of nextshell in the next line if desired, and code within payload sections as appropriate. +@SET "nextshell=pwsh" +@REM nextshell set to pwsh,sh,bash or tclsh +@REM @ECHO nextshell is %nextshell% +@SET "validshells=pwsh,sh,bash,tclsh" +@CALL SET keyRemoved=%%validshells:%nextshell%=%% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix) +@REM -- This section intended only to launch the next shell +@REM -- Avoid customising this if possible. cmd/batch script is probably the least expressive language. +@REM -- custom windows payloads should be in powershell,tclsh or sh/bash code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@IF %nextshell%==pwsh ( + CALL pwsh -nop -c set-executionpolicy -Scope CurrentUser RemoteSigned + COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL + REM test availability of preferred option of powershell7+ pwsh + CALL pwsh -nop -nol -c write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + IF !pwshtest_exitcode!==0 CALL pwsh -nop -nol "%~dp0%~n0.ps1" %* & SET task_exitcode=!errorlevel! + REM fallback to powershell if pwsh failed + IF NOT !pwshtest_exitcode!==0 ( + REM CALL powershell -nop -nol -c write-host powershell-found + CALL powershell -nop -nol -file "%~dp0%~n0.ps1" %* + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF %nextshell%==bash ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + CALL %nextshell% "!wslpath!%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + CALL %nextshell% "%~dp0%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value %nextshell% valid options are %validshells% + SET task_exitcode=66 + GOTO :exit + ) + ) +) +@GOTO :endlib +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@GOTO :eof +:endlib + +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit +@GOTO :exit +# } +# rem call %nextshell% "%~dp0%~n0.cmd" %* +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s2 set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup +Hide :exit;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust @call line above ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +exitcode=0 ;#default assumption +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# end hide sh/bash block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = getScriptName +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +# +# -- --- --- --- --- --- --- --- + + +# +# + +# unbal } + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: cmd exit label - return exitcode +:exit +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/multishell.ps1 b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/multishell.ps1 new file mode 100644 index 00000000..c2905c97 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/multishell.ps1 @@ -0,0 +1,256 @@ +set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershell;proc Hide s {proc $s args {}}; Hide :;rename set s2;Hide set;s2 1 list]"; set -- : "$@"; $1 = @' +: heredoc1 - hide from powershell (close sqote for unix shells) ' \ +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl \ +: "[Hide @ECHO; Hide ); Hide (;Hide echo]#not necessary but can help avoid errs in testing" +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +@REM { +@REM DO NOT MODIFY FIRST LINE OF THIS SCRIPT. shebang #! line is not required and will reduce functionality. +@REM Even comment lines can be part of the functionality of this script - modify with care. +@REM Change the value of nextshell in the next line if desired, and code within payload sections as appropriate. +@SET "nextshell=pwsh" +@REM nextshell set to pwsh,sh,bash or tclsh +@REM @ECHO nextshell is %nextshell% +@SET "validshells=pwsh,sh,bash,tclsh" +@CALL SET keyRemoved=%%validshells:%nextshell%=%% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix) +@REM -- This section intended only to launch the next shell +@REM -- Avoid customising this if possible. cmd/batch script is probably the least expressive language. +@REM -- custom windows payloads should be in powershell,tclsh or sh/bash code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@IF %nextshell%==pwsh ( + CALL pwsh -nop -c set-executionpolicy -Scope CurrentUser RemoteSigned + COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL + REM test availability of preferred option of powershell7+ pwsh + CALL pwsh -nop -nol -c write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + IF !pwshtest_exitcode!==0 CALL pwsh -nop -nol "%~dp0%~n0.ps1" %* & SET task_exitcode=!errorlevel! + REM fallback to powershell if pwsh failed + IF NOT !pwshtest_exitcode!==0 ( + REM CALL powershell -nop -nol -c write-host powershell-found + CALL powershell -nop -nol -file "%~dp0%~n0.ps1" %* + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF %nextshell%==bash ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + CALL %nextshell% "!wslpath!%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + CALL %nextshell% "%~dp0%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value %nextshell% valid options are %validshells% + SET task_exitcode=66 + GOTO :exit + ) + ) +) +@GOTO :endlib +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@GOTO :eof +:endlib + +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit +@GOTO :exit +# } +# rem call %nextshell% "%~dp0%~n0.cmd" %* +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s2 set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup +Hide :exit;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + + +# + + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust @call line above ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +#printf "start of bash or sh code" + +# + +# -- --- --- --- --- --- --- --- +exitcode=0 ;#default assumption +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# -- --- --- --- --- --- --- --- + +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# end hide sh/bash block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = getScriptName +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# + + +# -- --- --- --- --- --- --- --- +tclsh $scriptname $args +# -- --- --- --- --- --- --- --- + + +# + +# unbal } + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: cmd exit label - return exitcode +:exit +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/basic/multishell-old.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/basic/multishell-old.cmd new file mode 100644 index 00000000..1cb9e0ef --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/basic/multishell-old.cmd @@ -0,0 +1,270 @@ +set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershell;proc Hide s {proc $s args {}}; Hide :;rename set s2;Hide set;s2 1 list]"; set -- : "$@"; $1 = @' +: heredoc1 - hide from powershell (close sqote for unix shells) ' \ +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl \ +: "[Hide @ECHO; Hide ); Hide (;Hide echo]#not necessary but can help avoid errs in testing" +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +@REM { +@REM DO NOT MODIFY FIRST LINE OF THIS SCRIPT. shebang #! line is not required and will reduce functionality. +@REM Even comment lines can be part of the functionality of this script - modify with care. +@REM Change the value of nextshell in the next line if desired, and code within payload sections as appropriate. +@SET "nextshell=pwsh" +@REM nextshell set to pwsh,sh,bash or tclsh +@REM @ECHO nextshell is %nextshell% +@SET "validshells=pwsh,sh,bash,tclsh" +@CALL SET keyRemoved=%%validshells:%nextshell%=%% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix) +@REM -- This section intended only to launch the next shell +@REM -- Avoid customising this if possible. cmd/batch script is probably the least expressive language. +@REM -- custom windows payloads should be in powershell,tclsh or sh/bash code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@IF %nextshell%==pwsh ( + CALL pwsh -nop -c set-executionpolicy -Scope CurrentUser RemoteSigned + COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL + REM test availability of preferred option of powershell7+ pwsh + CALL pwsh -nop -nol -c write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + IF !pwshtest_exitcode!==0 CALL pwsh -nop -nol "%~dp0%~n0.ps1" %* & SET task_exitcode=!errorlevel! + REM fallback to powershell if pwsh failed + IF NOT !pwshtest_exitcode!==0 ( + REM CALL powershell -nop -nol -c write-host powershell-found + CALL powershell -nop -nol -file "%~dp0%~n0.ps1" %* + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF %nextshell%==bash ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + CALL %nextshell% "!wslpath!%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + CALL %nextshell% "%~dp0%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value %nextshell% valid options are %validshells% + SET task_exitcode=66 + GOTO :exit + ) + ) +) +@GOTO :endlib +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@GOTO :eof +:endlib + +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit +@GOTO :exit +# } +# rem call %nextshell% "%~dp0%~n0.cmd" %* +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s2 set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup +Hide :exit;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust @call line above ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +exitcode=0 ;#default assumption +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# end hide sh/bash block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = getScriptName +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +# +# -- --- --- --- --- --- --- --- + + +# +# + +# unbal } + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: cmd exit label - return exitcode +:exit +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/basic/shellbat.bat b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/basic/shellbat.bat new file mode 100644 index 00000000..aa9039a9 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/basic/shellbat.bat @@ -0,0 +1,112 @@ +: "[proc : args {}]" ;# *tcl shellbat - call with sh,bash,tclsh on any platform, or with cmd on windows. +: <<'HIDE_FROM_BASH_AND_SH' +: ;# leading colon hides from .bat, trailing slash hides next line from tcl \ +@call tclsh "%~dp0%~n0.bat" %* +: ;#\ +@set taskexitcode=%errorlevel% & goto :exit +# -*- tcl -*- +# ################################################################################################# +# This is a tcl shellbat file +# It is tuned to run when called as a batch file, a tcl script, an sh script or a bash script, +# so the specific layout and characters used are quite sensitive to change. +# It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# e.g ./filename.sh.bat in sh or bash or powershell +# e.g filename.sh or filename.sh.bat at windows command prompt +# e.g tclsh filename.sh.bat | sh filename.sh.bat | bash filename.sh.bat +# In all cases an arbitrary number of arguments are accepted +# To avoid the initial commandline on stdout when calling as a batch file on windows, use: +# cmd /Q /c filename.sh.bat +# (because we cannot use @if to silence it, as this isn't understood by tcl,sh or bash) +# ################################################################################################# +#fconfigure stdout -translation crlf +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" + + +# +# + +# --- --- --- --- --- --- --- --- --- --- --- --- --- +# only exit if needed. see exitcode notes at bottom of file and exit there for consistency across invocation methods +# --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +#-- +#-- bash/sh code follows. +#-- protect from tcl using line continuation char on the previous comment for each line, like so: \ +printf "etc" +#-- or alternatively place sh/bash script within the false==false block +#-- whilst being careful to balance braces {} +#-- For more complex needs you should call out to external scripts +#-- +#-- END marker for hide_from_bash_and_sh\ +HIDE_FROM_BASH_AND_SH + +#--------------------------------------------------------- +#-- This if statement hides(mostly) a sh/bash code block from Tcl +if false==false # else { +then +: +#--------------------------------------------------------- + #-- leave as is if all that's required is launching the Tcl payload" + #-- + #-- Note that sh/bash script isn't called when running a .bat from cmd.exe on windows by default + #-- adjust line 4: @call tclsh ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate + #-- if sh/bash scripting needs to run on windows too. + #-- + #printf "start of bash or sh code" + + # + # + + + #-- sh/bash launches Tcl here instead of shebang line at top + # + #-- use exec to use exitcode (if any) directly from the tcl script + exec /usr/bin/env tclsh "$0" "$@" + # + + #-- alternative - if sh/bash script required to run after the tcl call. + #/usr/bin/env tclsh "$0" "$@" + #tcl_exitcode=$? + #echo "tcl_exitcode: ${tcl_exitcode}" + + # + # + + #-- override exitcode example + #exit 66 + + #printf "No need for trailing slashes for sh/bash code here\n" +#--------------------------------------------------------- +fi +# closing brace for Tcl } +#--------------------------------------------------------- + +#-- tcl and shell script now both active + +#-- comment for line sample 1 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 1 \n" + +#-- comment for line sample 2 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 2 \n" + + +#-- Consistent exitcode from sh,bash,tclsh or cmd +#-- Call exit in tcl (or sh/bash) code only if explicitly required, otherwise leave this commented out. +#-- (script might be more widely useable without explicit exit. e.g in tcl: set ::argc 1; set ::argv "val"; source filename.sh.bat ) +#-- exit line unprotected by trailing slash will work for tcl and/or sh/bash +#exit 0 +#exit 42 + + + +#-- make sure sh/bash/tcl all skip over .bat style exit \ +: <<'shell_end' +#-- .bat exit with exitcode from tcl process \ +:exit +: ;# \ +@exit /B %taskexitcode% +# .bat has exited \ +shell_end + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd new file mode 100644 index 00000000..9de4c125 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd @@ -0,0 +1,717 @@ +: "punk MULTISHELL - shebangless polyglot for Tcl Perl sh bash cmd pwsh powershell" + "[rename set s;proc Hide x {proc $x args {}};Hide :]" + "\$(function : {<#pwsh#>})" + "perlhide" + qw^ +set -- "$@" "a=[Hide <#;Hide set;s 1 list]"; set -- : "$@";$1 = @' +: heredoc1 - hide from powershell using @ and squote above. close sqote for unix shells + ' \ +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl + \ +: "[Hide @GOTO; Hide =begin; Hide @REM] #not necessary but can help avoid errs in testing" + +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: STRONG SUGGESTION: DO NOT MODIFY FIRST LINE OF THIS SCRIPT - except for first double quoted section. +: shebang line is not required on unix or windows and will reduce functionality and/or portability. +: Even comment lines can be part of the functionality of this script (both on unix and windows) - modify with care. +@GOTO :skip_perl_pod_start ^; +=begin excludeperl +: skip_perl_pod_start +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +: { +@REM ############################################################################################################################ +@REM THIS IS A POLYGLOT SCRIPT - supporting payloads in Tcl, bash, (some sh) and/or powershelll (powershell.exe or pwsh.exe) +@REM It should remain portable between unix-like OSes & windows if the proper structure is maintained. +@REM ############################################################################################################################ +@REM On windows, change the value of nextshell to one of the listed 2 digit values if desired, and add code within payload sections for tcl,sh,bash,powershell as appropriate. +@REM This wrapper can be edited manually (carefully!) - or sh,bash,tcl,powershell scripts can be wrapped using the Tcl-based punkshell system +@REM e.g from within a running punkshell: deck scriptwrap.multishell -outputfolder +@REM On unix-like systems, call with sh, bash or tclsh. (powershell untested on unix - and requires wscript if security elevation is used) +@REM Due to lack of shebang (#! line) Unix-like systems will probably (hopefully) default to sh if the script is called without an interpreter - but it may depend on the shell in use when called. +@REM If you find yourself really wanting/needing to add a shebang line - do so on the basis that the script will exist on unix-like systems only. +@REM in batch scripts - array syntax with square brackets is a simulation of arrays or associative arrays. +@REM note that many shells linked as sh do not support substition syntax and may fail - e.g dash etc - generally bash should be used in this context +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "validshelltypes= powershell______ sh______________ wslbash_________ bash____________ tcl_____________ perl____________" +@REM for batch - only win32 is relevant - but other scripts on other platforms also parse the nextshell block to determine next shell to launch +@REM nextshellpath and nextshelltype indices (underscore-padded to 16wide) are "other" plus those returned by Tcl platform pkg e.g win32,linux,freebsd,macosx +@REM The horrible underscore-padded fixed-widths are to keep the batch labels aligned whilst allowing values to be set +@REM If more than 32 chars needed for a target, it can still be done but overall script padding may need checking/adjusting +@REM Supporting more explicit oses than those listed may also require script padding adjustment +: +@SET "nextshellpath[win32___________]=tclsh___________________________" +@SET "nextshelltype[win32___________]=tcl_____________" +@SET "nextshellpath[dragonflybsd____]=/usr/bin/env tclsh______________" +@SET "nextshelltype[dragonflybsd____]=tcl_____________" +@SET "nextshellpath[freebsd_________]=/usr/bin/env tclsh______________" +@SET "nextshelltype[freebsd_________]=tcl_____________" +@SET "nextshellpath[netbsd__________]=/usr/bin/env tclsh______________" +@SET "nextshelltype[netbsd__________]=tcl_____________" +@SET "nextshellpath[linux___________]=/usr/bin/env tclsh______________" +@SET "nextshelltype[linux___________]=tcl_____________" +@SET "nextshellpath[macosx__________]=/usr/bin/env tclsh______________" +@SET "nextshelltype[macosx__________]=tcl_____________" +@SET "nextshellpath[other___________]=/usr/bin/env tclsh______________" +@SET "nextshelltype[other___________]=tcl_____________" +: +@rem asadmin is for automatic elevation to administrator. Separate window will be created (seems unavoidable with current elevation mechanism) and user will still get security prompt (probably reasonable). +: +@SET "asadmin=0" +: +@REM @ECHO nextshelltype is %nextshelltype[win32___________]% +@REM @SET "selected_shelltype=%nextshelltype[win32___________]%" +@SET "selected_shelltype=%nextshelltype[win32___________]%" +@REM @ECHO selected_shelltype %selected_shelltype% +@CALL :stringTrimTrailingUnderscores %selected_shelltype% selected_shelltype_trimmed +@REM @ECHO selected_shelltype_trimmed %selected_shelltype_trimmed% +@SET "selected_shellpath=%nextshellpath[win32___________]%" +@CALL :stringTrimTrailingUnderscores %selected_shellpath% selected_shellpath_trimmed +@CALL SET "keyRemoved=%%validshelltypes:!selected_shelltype!=%%" +@REM @ECHO keyremoved %keyRemoved% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix but should be left in place) +@REM -- This section intended mainly to launch the next shell (and to escalate privileges if necessary) +@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script can be useful, but is probably the least expressive language and most error prone. +@REM -- For example - as this file needs to use unix-style lf line-endings - the label scanner is susceptible to the 512Byte boundary issue: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 +@REM -- This label issue can be triggered/abused in files with crlf line endings too - but it is less likely to happen accidentaly. +@REm -- See also: https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts/4095133#4095133 +@REM ############################################################################################################################ +@REM -- Due to this issue -seemingly trivial edits of the batch file section can break the script! (for Windows anyway) +@REM -- Even something as simple as adding or removing an @REM +@REM -- From within punkshell - use: +@REM -- deck scriptwrap.checkfile +@REM -- to check your templates or final wrapped scripts for byte boundary issues +@REM -- It will report any labels that are on boundaries +@REM -- This is why the nextshell value above is a 2 digit key instead of a string - so that editing the value doesn't change the byte offsets. +@REM -- Editing your sh,bash,tcl,pwsh payloads is much less likely to cause an issue. There is the possibility of the final batch :exit_multishell label spanning a boundary - so testing using deck scriptwrap.checkfile is still recommended. +@REM -- Alternatively, as you should do anyway - test the final script on windows +@REM -- Aside from adding comments/whitespace to tweak the location of labels - you can try duplicating the label (e.g just add the label on a line above) but this is not guaranteed to work in all situations. +@REM -- '@REM' is a safer comment mechanism than a leading colon - which is used sparingly here. +@REM -- A colon anywhere in the script that happens to land on a 512 Byte boundary (from file start or from a callsite) could be misinterpreted as a label +@REM -- It is unknown what versions of cmd interpreters behave this way - and deck scriptwrap.checkfile doesn't check all such boundaries. +@REm -- For this reason, batch labels should be chosen to be relatively unlikely to collide with other strings in the file, and simple names such as :exit or :end should probably be avoided +@REM ############################################################################################################################ +@REM -- custom windows payloads should be in powershell,tclsh (or sh/bash if available) code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@REM @ECHO commandlineascalled %0 +@REM @ECHO commandlineresolved %~f0 +@CALL :getNormalizedScriptTail nftail +@REM @ECHO normalizedscripttail %nftail% +@CALL :getFileTail %0 clinetail +@REM @ECHO clinetail %clinetail% +@CALL :stringToUpper %~nx0 capscripttail +@REM @ECHO capscriptname: %capscripttail% + +@IF "%nftail%"=="%capscripttail%" ( + @ECHO forcing asadmin=1 due to file name on filesystem being uppercase + @SET "asadmin=1" +) else ( + @CALL :stringToUpper %clinetail% capcmdlinetail + @REM @ECHO capcmdlinetail !capcmdlinetail! + IF "%clinetail%"=="!capcmdlinetail!" ( + @ECHO forcing asadmin=1 due to cmdline scriptname in uppercase + @set "asadmin=1" + ) +) +@SET "vbsGetPrivileges=%temp%\punk_bat_elevate_%fname%.vbs" +@SET arglist=%* +@SET "qstrippedargs=args%arglist%" +@SET "qstrippedargs=%qstrippedargs:"=%" +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" ( + GOTO :gotPrivileges +) +@IF !asadmin!==1 ( + net file 1>NUL 2>NUL + @IF '!errorlevel!'=='0' ( GOTO :gotPrivileges ) else ( GOTO :getPrivileges ) +) +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@REM padding +@GOTO skip_privileges +:getPrivileges +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" (echo PUNK-ELEVATED & shift /1 & goto :gotPrivileges ) +@ECHO Set UAC = CreateObject^("Shell.Application"^) > "%vbsGetPrivileges%" +@ECHO args = "PUNK-ELEVATED " >> "%vbsGetPrivileges%" +@ECHO For Each strArg in WScript.Arguments >> "%vbsGetPrivileges%" +@ECHO args = args ^& strArg ^& " " >> "%vbsGetPrivileges%" +@ECHO Next >> "%vbsGetPrivileges%" +@ECHO UAC.ShellExecute "%~dp0%~n0%~x0", args, "", "runas", 1 >> "%vbsGetPrivileges%" +@ECHO Launching script in new windows due to administrator elevation +@"%SystemRoot%\System32\WScript.exe" "%vbsGetPrivileges%" %* +@EXIT /B + +:gotPrivileges +@REM setlocal & pushd . +@PUSHD . +@cd /d %~dp0 +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" ( + @DEL "%vbsGetPrivileges%" 1>nul 2>nul + @SET arglist=%arglist:~14% +) + +:skip_privileges +@SET need_ps1=0 +@REM we want the ps1 to exist even if the nextshell isn't powershell +@if not exist "%~dp0%~n0.ps1" ( + @SET need_ps1=1 +) ELSE ( + fc "%~dp0%~n0%~x0" "%~dp0%~n0.ps1" >nul || goto different + @REM @ECHO "files same" + @SET need_ps1=0 +) +@GOTO :pscontinue +:different +@REM @ECHO "files differ" +@SET need_ps1=1 +:pscontinue +@IF !need_ps1!==1 ( + COPY "%~dp0%~n0%~x0" "%~dp0%~n0.ps1" >NUL +) +@REM avoid using CALL to launch pwsh,tclsh etc - it will intercept some args such as /? +@IF "%selected_shelltype_trimmed%"=="powershell" ( + REM pws vs powershell hasn't been tested because we didn't need to copy cmd to ps1 this time + REM test availability of preferred option of powershell7+ pwsh + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + REM fallback to powershell if pwsh failed + IF !pwshtest_exitcode!==0 ( + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; "%~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + REM CALL powershell -nop -nol -c write-host powershell-found + REM powershell -nop -nol -file "%~dp0%~n0.ps1" %* + powershell -nop -nol -c set-executionpolicy -Scope Process Unrestricted; %~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF "%selected_shelltype_trimmed%"=="wslbash" ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + %selected_shellpath_trimmed% "!wslpath!%fname%" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + REM perl or tcl or sh or bash + IF NOT "x%keyRemoved%"=="x%validshelltypes%" ( + REM sh on windows uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + REM The compound statement with trailing call is required to stop batch termination confirmation, whilst still capturing exitcode + %selected_shellpath_trimmed% "%~dp0%fname%" %arglist% & SET task_exitcode=!errorlevel! & Call; + ) ELSE ( + ECHO %fname% has invalid nextshelltype value %selected_shelltype% valid options are %validshelltypes% + SET task_exitcode=66 + @REM boundary padding + @REM boundary padding + GOTO :exit_multishell + ) + ) +) +@REM batch file library functions +@REM boundary padding +@GOTO :endlib + +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@EXIT /B + +:getFileTail +@REM return tail of file without any normalization e.g c:/punkshell/bin/Punk.cmd returns Punk.cmd even if file is punk.cmd +@REM we can't use things such as %~nx1 as it can change capitalisation +@REM This function is designed explicitly to preserve capitalisation +@REM accepts full paths with either / or \ as delimiters - or +@SETLOCAL + @SET "rtrn=%~2" + @SET "arg=%~1" + @REM @SET "result=%_arg:*/=%" + @REM @SET "result=%~1" + @SET LF=^ + + + : The above 2 empty lines are important. Don't remove + @CALL :stringContains "!arg!" "\" hasBackSlash + @IF "!hasBackslash!"=="true" ( + @for %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:\=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @CALL :stringContains "!arg!" "/" hasForwardSlash + @IF "!hasForwardSlash!"=="true" ( + @FOR %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:/=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @set "result=%arg%" + ) + ) +@ENDLOCAL & ( + @if "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B +@REM boundary padding +@REM boundary padding +:getNormalizedScriptTail +@SETLOCAL + @SET "result=%~nx0" + @SET "rtrn=%~1" +@ENDLOCAL & ( + @IF "%~1" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:getNormalizedFileTailFromPath +@REM warn via echo, and do not set return variable if path not found +@REM note that %~nx1 does not preserve case of provided path - hence the name 'normalized' +@REM boundary padding +@REM boundary padding +@REM boundary padding +@REM boundary padding +@SETLOCAL + @CALL :stringContains %~1 "\" hasBackSlash + @CALL :stringContains %~1 "/" hasForwardSlash + @IF "%hasBackslash%-%hasForwardslash%"=="false-false" ( + @SET "P=%cd%%~1" + @CALL :getNormalizedFileTailFromPath "!P!" ftail2 + @SET "result=!ftail2!" + ) else ( + @IF EXIST "%~1" ( + @SET "result=%~nx1" + ) else ( + @ECHO error getNormalizedFileTailFromPath file not found: %~1 + @EXIT /B 1 + ) + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + @ECHO getNormalizedFileTailFromPath %1 result: %result% + ) +) +@EXIT /B + +:stringContains +@REM usage: @CALL:stringContains string needle returnvarname +@SETLOCAL + @SET "rtrn=%~3" + @SET "string=%~1" + @SET "needle=%~2" + @IF "!string:%needle%=!"=="!string!" @( + @SET "result=false" + ) ELSE ( + @SET "result=true" + ) +@ENDLOCAL & ( + @IF "%~3" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringContains %string% %needle% result: %result% + ) +) +@EXIT /B + +:stringToUpper +@SETLOCAL + @SET "rtrn=%~2" + @SET "string=%~1" + @SET "capstring=%~1" + @FOR %%A in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) DO @( + @SET "capstring=!capstring:%%A=%%A!" + ) + @SET "result=!capstring!" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringToUpper %string% result: %result% + ) +) +@EXIT /B +:stringTrimTrailingUnderscores +@SETLOCAL + @SET "rtrn=%~2" + @SET "string=%~1" + @SET "trimstring=%~1" + @REM trim up to 31 underscores from the end of a string using string substitution + @SET trimstring=%trimstring%### + @SET trimstring=%trimstring:________________###=###% + @SET trimstring=%trimstring:________###=###% + @SET trimstring=%trimstring:____###=###% + @SET trimstring=%trimstring:__###=###% + @SET trimstring=%trimstring:_###=###% + @SET trimstring=%trimstring:###=% + @SET "result=!trimstring!" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringTrimTrailingUnderscores %string% result: %result% + ) +) +@EXIT /B +:isNumeric +@SETLOCAL + @SET "notnumeric="&FOR /F "delims=0123456789" %%i in ("%1") do set "notnumeric=%%i" + @IF defined notnumeric ( + @SET "result=false" + ) else ( + @SET "result=true" + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:endlib +: \ +@REM padding +@REM @SET taskexit_code=!errorlevel! & goto :exit_multishell +@GOTO :exit_multishell +# } +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup and restore +Hide :exit_multishell;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + +# +# + + +# +# + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : # +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust the %nextshell% value above +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +exitcode=0 +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "sh/bash reporting tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- Perl script section +# -- leave the script below as is, if all that is required is launching the Tcl payload" +# -- +# -- Note that perl script isn't called by default when simply running this script by name +# -- adjust the nextshell value at the top of the script to point to perl +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +=cut +#!/user/bin/perl +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin perl Payload +my $exit_code = 0; +#use ExtUtils::Installed; +#my $installed = ExtUtils::Installed->new(); +#my @modules = $installed->modules(); +#print "Modules:\n"; +#foreach my $m (@modules) { +# print "$m\n"; +#} +# -- --- --- + + + +my $scriptname = $0; +print "perl $scriptname\n"; +my $i =1; +foreach my $a(@ARGV) { + print "Arg # $i: $a\n"; +} + +# +# + + + +# -- --- --- --- --- --- --- --- +# +$exit_code=system("tclsh", $scriptname, @ARGV); +#print "perl reporting tcl exitcode: $exit_code"; +# +# -- --- --- --- --- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end perl Payload +exit $exit_code; +__END__ + +# end hide sh/bash/perl block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- Do not edit if current file is the .ps1 +# -- Edit the corresponding .cmd and it will autocopy +# -- unbalanced braces { } here *even in comments* will cause problems if there was no Tcl exit or return above +# -- custom script should generally go below the begin_powershell_payload line +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = GetScriptName +function GetDynamicParamDictionary { + [CmdletBinding()] + param( + [Parameter(ValueFromPipeline=$true, Mandatory=$true)] + [string] $CommandName + ) + + begin { + # Get a list of params that should be ignored (they're common to all advanced functions) + $CommonParameterNames = [System.Runtime.Serialization.FormatterServices]::GetUninitializedObject([type] [System.Management.Automation.Internal.CommonParameters]) | + Get-Member -MemberType Properties | + Select-Object -ExpandProperty Name + } + + process { + # Create the dictionary that this scriptblock will return: + $DynParamDictionary = New-Object System.Management.Automation.RuntimeDefinedParameterDictionary + + # Convert to object array and get rid of Common params: + (Get-Command $CommandName | select -exp Parameters).GetEnumerator() | + Where-Object { $CommonParameterNames -notcontains $_.Key } | + ForEach-Object { + $DynamicParameter = New-Object System.Management.Automation.RuntimeDefinedParameter ( + $_.Key, + $_.Value.ParameterType, + $_.Value.Attributes + ) + $DynParamDictionary.Add($_.Key, $DynamicParameter) + } + + # Return the dynamic parameters + return $DynParamDictionary + } +} +# GetDynamicParamDictionary +# - This can make it easier to share a single set of param definitions between functions +# - sample usage +#function ParameterDefinitions { +# param( +# [Parameter(Mandatory)][string] $myargument +# ) +#} +#function psmain { +# [CmdletBinding()] +# param() +# dynamicparam { GetDynamicParamDictionary ParameterDefinitions } +# process { +# #called once with $PSBoundParameters dictionary +# #can be used to validate arguments, or set a simpler variable name for access +# switch ($PSBoundParameters.keys) { +# 'myargumentname' { +# Set-Variable -Name $_ -Value $PSBoundParameters."$_" +# } +# #... +# } +# foreach ($boundparam in $PSBoundParameters.GetEnumerator()) { +# #... +# } +# } +# end { +# #Main function logic +# Write-Host "myargumentname value is: $myargumentname" +# #myotherfunction @PSBoundParameters +# } +#} +#psmain @args +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +# +# -- --- --- --- --- --- --- --- + + +# +# + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: comment end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: multishell doubled-up cmd exit label - return exitcode +:exit_multishell +:exit_multishell +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" (echo. & @cmd /k echo elevated prompt: type exit to quit) +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: comment end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +no script engine should try to run me +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell1.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell1.cmd new file mode 100644 index 00000000..17fe4c15 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell1.cmd @@ -0,0 +1,524 @@ +: "[rename set s;proc Hide x {proc $x args {}};Hide :]" "\$(function : {<#pwsh#>})" ^ +set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershell;proc Hide x {proc $x args {}}; Hide <#;Hide set;s 1 list]"; set -- : "$@";$1 = @' +: heredoc1 - hide from powershell using @ and squote above. (close sqote for unix shells) ' \ +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl \ +: "[Hide @ECHO; Hide ); Hide (;Hide echo; Hide @REM]#not necessary but can help avoid errs in testing" +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +: { +: STRONG SUGGESTION: DO NOT MODIFY FIRST LINE OF THIS SCRIPT. shebang #! line is not required on unix or windows and will reduce functionality and/or portability. +: Even comment lines can be part of the functionality of this script (both on unix and windows) - modify with care. +@REM ############################################################################################################################ +@REM THIS IS A POLYGLOT SCRIPT - supporting payloads in Tcl, bash, sh and/or powershelll (powershell.exe or pwsh.exe) +@REM It should remain portable between unix-like OSes & windows if the proper structure is maintained. +@REM ############################################################################################################################ +@REM On windows, change the value of nextshell to one of the listed 2 digit values if desired, and add code within payload sections for tcl,sh,bash,powershell as appropriate. +@REM This wrapper can be edited manually (carefully!) - or sh,bash,tcl,powershell scripts can be wrapped using the Tcl-based punkshell system +@REM e.g from within a running punkshell: pmix scriptwrap.multishell -outputfolder +@REM On unix-like systems, call with sh, bash or tclsh. (powershell untested on unix - and requires wscript if security elevation is used) +@REM Due to lack of shebang (#! line) Unix-like systems will probably (hopefully) default to sh if the script is called without an interpreter - but it may depend on the shell in use when called. +@REM If you find yourself really wanting/needing to add a shebang line - do so on the basis that the script will exist on unix-like systems only. +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "validshells= ^(10^) 'pwsh' ^(11^) 'sh' (^12^) 'bash' (^13^) 'tclsh'" +@SET "shells[10]=pwsh" +@SET "shells[11]=sh" +@set "shells[12]=bash" +@SET "shells[13]=tclsh" +: +@SET "nextshell=13" +: +@rem asadmin is for automatic elevation to administrator. Separate window will be created (seems unavoidable with current elevation mechanism) and user will still get security prompt (probably reasonable). +: +@SET "asadmin=0" +: +@REM nextshell set to index for validshells .eg 10 for pwsh +@REM @ECHO nextshell is %nextshell% +@SET "selected=!shells[%nextshell%]!" +@REM @ECHO selected %selected% +@CALL SET "keyRemoved=%%validshells:'!selected!'=%%" +@REM @ECHO keyremoved %keyRemoved% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix but should be left in place) +@REM -- This section intended mainly to launch the next shell (and to escalate privileges if necessary) +@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script can be useful, but is probably the least expressive language and most error prone. +@REM -- For example - as this file needs to use unix-style lf line-endings - the label scanner is susceptible to the 512Byte boundary issue: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 +@REM -- This label issue can be triggered/abused in files with crlf line endings too - but it is less likely to happen accidentaly. +@REm -- See also: https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts/4095133#4095133 +@REM ############################################################################################################################ +@REM -- Due to this issue -seemingly trivial edits of the batch file section can break the script! (for Windows anyway) +@REM -- Even something as simple as adding or removing an @REM +@REM -- From within punkshell - use: +@REM -- pmix scriptwrap.checkfile +@REM -- to check your templates or final wrapped scripts for byte boundary issues +@REM -- It will report any labels that are on boundaries +@REM -- This is why the nextshell value above is a 2 digit key instead of a string - so that editing the value doesn't change the byte offsets. +@REM -- Editing your sh,bash,tcl,pwsh payloads is much less likely to cause an issue. There is the possibility of the final batch :exit_multishell label spanning a boundary - so testing using pmix scriptwrap.checkfile is still recommended. +@REM -- Alternatively, as you should do anyway - test the final script on windows +@REM -- Aside from adding comments/whitespace to tweak the location of labels - you can try duplicating the label (e.g just add the label on a line above) but this is not guaranteed to work in all situations. +@REM -- '@REM' is a safer comment mechanism than a leading colon - which is used sparingly here. +@REM -- A colon anywhere in the script that happens to land on a 512 Byte boundary (from file start or from a callsite) could be misinterpreted as a label +@REM -- It is unknown what versions of cmd interpreters behave this way - and pmix scriptwrap.checkfile doesn't check all such boundaries. +@REm -- For this reason, batch labels should be chosen to be relatively unlikely to collide with other strings in the file, and simple names such as :exit or :end should probably be avoided +@REM ############################################################################################################################ +@REM -- custom windows payloads should be in powershell,tclsh (or sh/bash if available) code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@REM @ECHO commandlineascalled %0 +@REM @ECHO commandlineresolved %~f0 +@CALL :getNormalizedScriptTail nftail +@REM @ECHO normalizedscripttail %nftail% +@CALL :getFileTail %0 clinetail +@REM @ECHO clinetail %clinetail% +@CALL :stringToUpper %~nx0 capscripttail +@REM @ECHO capscriptname: %capscripttail% + +@IF "%nftail%"=="%capscripttail%" ( + @ECHO forcing asadmin=1 due to file name on filesystem being uppercase + @SET "asadmin=1" +) else ( + @CALL :stringToUpper %clinetail% capcmdlinetail + @REM @ECHO capcmdlinetail !capcmdlinetail! + IF "%clinetail%"=="!capcmdlinetail!" ( + @ECHO forcing asadmin=1 due to cmdline scriptname in uppercase + @set "asadmin=1" + ) +) +@SET "vbsGetPrivileges=%temp%\punk_bat_elevate_%fname%.vbs" +@SET arglist=%* +@IF "%1"=="PUNK-ELEVATED" ( + GOTO :gotPrivileges +) +@IF !asadmin!==1 ( + net file 1>NUL 2>NUL + @IF '!errorlevel!'=='0' ( GOTO :gotPrivileges ) else ( GOTO :getPrivileges ) +) +@GOTO skip_privileges +:getPrivileges +@IF '%1'=='PUNK-ELEVATED' (echo PUNK-ELEVATED & shift /1 & goto :gotPrivileges ) +@ECHO Set UAC = CreateObject^("Shell.Application"^) > "%vbsGetPrivileges%" +@ECHO args = "PUNK-ELEVATED " >> "%vbsGetPrivileges%" +@ECHO For Each strArg in WScript.Arguments >> "%vbsGetPrivileges%" +@ECHO args = args ^& strArg ^& " " >> "%vbsGetPrivileges%" +@ECHO Next >> "%vbsGetPrivileges%" +@ECHO UAC.ShellExecute "%~dp0%~n0.cmd", args, "", "runas", 1 >> "%vbsGetPrivileges%" +@ECHO Launching script in new windows due to administrator elevation +@"%SystemRoot%\System32\WScript.exe" "%vbsGetPrivileges%" %* +@EXIT /B + +:gotPrivileges +@REM setlocal & pushd . +@PUSHD . +@cd /d %~dp0 +@IF "%1"=="PUNK-ELEVATED" ( + @DEL "%vbsGetPrivileges%" 1>nul 2>nul + @SET arglist=%arglist:~14% +) + +:skip_privileges +@SET need_ps1=0 +@REM we want the ps1 to exist even if the nextshell isn't powershell +@if not exist "%~dp0%~n0.ps1" ( + @SET need_ps1=1 +) ELSE ( + fc "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >nul || goto different + @REM @ECHO "files same" + @SET need_ps1=0 +) +@GOTO :pscontinue +:different +@REM @ECHO "files differ" +@SET need_ps1=1 +:pscontinue +@IF !need_ps1!==1 ( + COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL +) +@REM avoid using CALL to launch pwsh,tclsh etc - it will intercept some args such as /? +@IF "!shells[%nextshell%]!"=="pwsh" ( + REM pws vs powershell hasn't been tested because we didn't need to copy cmd to ps1 this time + REM test availability of preferred option of powershell7+ pwsh + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + REM fallback to powershell if pwsh failed + IF !pwshtest_exitcode!==0 ( + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; "%~dp0%~n0.ps1" %arglist% & SET task_exitcode=!errorlevel! + ) ELSE ( + REM CALL powershell -nop -nol -c write-host powershell-found + REM powershell -nop -nol -file "%~dp0%~n0.ps1" %* + powershell -nop -nol -c set-executionpolicy -Scope Process Unrestricted; %~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF "!shells[%nextshell%]!"=="bash" ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + !shells[%nextshell%]! "!wslpath!%fname%" %arglist% & SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh on windows uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + !shells[%nextshell%]! "%~dp0%fname%" %arglist% & SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value ^(%nextshell%^) !shells[%nextshell%]! valid options are %validshells% + SET task_exitcode=66 + GOTO :exit_multishell + ) + ) +) +@REM batch file library functions +@GOTO :endlib + +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@EXIT /B + +:getFileTail +@REM return tail of file without any normalization e.g c:/punkshell/bin/Punk.cmd returns Punk.cmd even if file is punk.cmd +@REM we can't use things such as %~nx1 as it can change capitalisation +@REM This function is designed explicitly to preserve capitalisation +@REM accepts full paths with either / or \ as delimiters - or +@SETLOCAL + @SET "rtrn=%~2" + @SET "arg=%~1" + @REM @SET "result=%_arg:*/=%" + @REM @SET "result=%~1" + @SET LF=^ + + + : The above 2 empty lines are important. Don't remove + @CALL :stringContains "!arg!" "\" hasBackSlash + @IF "!hasBackslash!"=="true" ( + @for %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:\=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @CALL :stringContains "!arg!" "/" hasForwardSlash + @IF "!hasForwardSlash!"=="true" ( + @FOR %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:/=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @set "result=%arg%" + ) + ) +@ENDLOCAL & ( + @if "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B +@REM boundary padding +:getNormalizedScriptTail +@SETLOCAL + @SET "result=%~nx0" + @SET "rtrn=%~1" +@ENDLOCAL & ( + @IF "%~1" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:getNormalizedFileTailFromPath +@REM warn via echo, and do not set return variable if path not found +@REM note that %~nx1 does not preserve case of provided path - hence the name 'normalized' +@REM boundary padding +@REM boundary padding +@SETLOCAL + @CALL :stringContains %~1 "\" hasBackSlash + @CALL :stringContains %~1 "/" hasForwardSlash + @IF "%hasBackslash%-%hasForwardslash%"=="false-false" ( + @SET "P=%cd%%~1" + @CALL :getNormalizedFileTailFromPath "!P!" ftail2 + @SET "result=!ftail2!" + ) else ( + @IF EXIST "%~1" ( + @SET "result=%~nx1" + ) else ( + @ECHO error getNormalizedFileTailFromPath file not found: %~1 + @EXIT /B 1 + ) + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + @ECHO getNormalizedFileTailFromPath %1 result: %result% + ) +) +@EXIT /B + +:stringContains +@REM usage: @CALL:stringContains string needle returnvarname +@SETLOCAL + @SET "rtrn=%~3" + @SET "string=%~1" + @SET "needle=%~2" + @IF "!string:%needle%=!"=="!string!" @( + @SET "result=false" + ) ELSE ( + @SET "result=true" + ) +@ENDLOCAL & ( + @IF "%~3" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringContains %string% %needle% result: %result% + ) +) +@EXIT /B + +:stringToUpper +@SETLOCAL + @SET "rtrn=%~2" + @SET "string=%~1" + @SET "capstring=%~1" + @FOR %%A in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) DO @( + @SET "capstring=!capstring:%%A=%%A!" + ) + @SET "result=!capstring!" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringToUpper %string% result: %result% + ) +) +@EXIT /B + +:isNumeric +@SETLOCAL + @SET "notnumeric="&FOR /F "delims=0123456789" %%i in ("%1") do set "notnumeric=%%i" + @IF defined notnumeric ( + @SET "result=false" + ) else ( + @SET "result=true" + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:endlib +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit_multishell +@GOTO :exit_multishell +# } +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup and restore +Hide :exit_multishell;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : # +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust @call line above ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +exitcode=0 ;#default assumption +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# end hide sh/bash block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- Do not edit if current file is the .ps1 +# -- Edit the corresponding .cmd and it will autocopy +# -- unbalanced braces { } here *even in comments* will cause problems if there was no Tcl exit or return above +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = getScriptName +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +# +# -- --- --- --- --- --- --- --- + + +# +# + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: comment end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: multishell doubled-up cmd exit label - return exitcode +:exit_multishell +:exit_multishell +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@IF "%1"=="PUNK-ELEVATED" (echo. & @cmd /k echo elevated prompt: type exit to quit) +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: comment end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell2.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell2.cmd new file mode 100644 index 00000000..a9688b6a --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/multishell2.cmd @@ -0,0 +1,680 @@ +: "punk MULTISHELL - shebangless polyglot for Tcl Perl sh bash cmd pwsh powershell" + "[rename set s;proc Hide x {proc $x args {}};Hide :]" + "\$(function : {<#pwsh#>})" + "perlhide" + qw^ +set -- "$@" "a=[Hide <#;Hide set;s 1 list]"; set -- : "$@";$1 = @' +: heredoc1 - hide from powershell using @ and squote above. close sqote for unix shells + ' \ +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl + \ +: "[Hide @GOTO; Hide =begin; Hide @REM] #not necessary but can help avoid errs in testing" + +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: STRONG SUGGESTION: DO NOT MODIFY FIRST LINE OF THIS SCRIPT - except for first double quoted section. +: shebang line is not required on unix or windows and will reduce functionality and/or portability. +: Even comment lines can be part of the functionality of this script (both on unix and windows) - modify with care. +@GOTO :skip_perl_pod_start ^; +=begin excludeperl +: skip_perl_pod_start +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +: { +@REM ############################################################################################################################ +@REM THIS IS A POLYGLOT SCRIPT - supporting payloads in Tcl, bash, sh and/or powershelll (powershell.exe or pwsh.exe) +@REM It should remain portable between unix-like OSes & windows if the proper structure is maintained. +@REM ############################################################################################################################ +@REM On windows, change the value of nextshell to one of the listed 2 digit values if desired, and add code within payload sections for tcl,sh,bash,powershell as appropriate. +@REM This wrapper can be edited manually (carefully!) - or sh,bash,tcl,powershell scripts can be wrapped using the Tcl-based punkshell system +@REM e.g from within a running punkshell: deck scriptwrap.multishell -outputfolder +@REM On unix-like systems, call with sh, bash or tclsh. (powershell untested on unix - and requires wscript if security elevation is used) +@REM Due to lack of shebang (#! line) Unix-like systems will probably (hopefully) default to sh if the script is called without an interpreter - but it may depend on the shell in use when called. +@REM If you find yourself really wanting/needing to add a shebang line - do so on the basis that the script will exist on unix-like systems only. +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "validshells= ^(10^) 'pwsh' ^(11^) 'sh' (^12^) 'bash' (^13^) 'tclsh' (^14^) 'perl'" +@SET "shells[10]=pwsh" +@SET "shells[11]=sh" +@set "shells[12]=bash" +@SET "shells[13]=tclsh" +@SET "shells[14]=perl" +: +@SET "nextshell=13" +: +@rem asadmin is for automatic elevation to administrator. Separate window will be created (seems unavoidable with current elevation mechanism) and user will still get security prompt (probably reasonable). +: +@SET "asadmin=0" +: +@REM nextshell set to index for validshells .eg 10 for pwsh +@REM @ECHO nextshell is %nextshell% +@SET "selected=!shells[%nextshell%]!" +@REM @ECHO selected %selected% +@CALL SET "keyRemoved=%%validshells:'!selected!'=%%" +@REM @ECHO keyremoved %keyRemoved% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix but should be left in place) +@REM -- This section intended mainly to launch the next shell (and to escalate privileges if necessary) +@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script can be useful, but is probably the least expressive language and most error prone. +@REM -- For example - as this file needs to use unix-style lf line-endings - the label scanner is susceptible to the 512Byte boundary issue: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 +@REM -- This label issue can be triggered/abused in files with crlf line endings too - but it is less likely to happen accidentaly. +@REm -- See also: https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts/4095133#4095133 +@REM ############################################################################################################################ +@REM -- Due to this issue -seemingly trivial edits of the batch file section can break the script! (for Windows anyway) +@REM -- Even something as simple as adding or removing an @REM +@REM -- From within punkshell - use: +@REM -- deck scriptwrap.checkfile +@REM -- to check your templates or final wrapped scripts for byte boundary issues +@REM -- It will report any labels that are on boundaries +@REM -- This is why the nextshell value above is a 2 digit key instead of a string - so that editing the value doesn't change the byte offsets. +@REM -- Editing your sh,bash,tcl,pwsh payloads is much less likely to cause an issue. There is the possibility of the final batch :exit_multishell label spanning a boundary - so testing using deck scriptwrap.checkfile is still recommended. +@REM -- Alternatively, as you should do anyway - test the final script on windows +@REM -- Aside from adding comments/whitespace to tweak the location of labels - you can try duplicating the label (e.g just add the label on a line above) but this is not guaranteed to work in all situations. +@REM -- '@REM' is a safer comment mechanism than a leading colon - which is used sparingly here. +@REM -- A colon anywhere in the script that happens to land on a 512 Byte boundary (from file start or from a callsite) could be misinterpreted as a label +@REM -- It is unknown what versions of cmd interpreters behave this way - and deck scriptwrap.checkfile doesn't check all such boundaries. +@REm -- For this reason, batch labels should be chosen to be relatively unlikely to collide with other strings in the file, and simple names such as :exit or :end should probably be avoided +@REM ############################################################################################################################ +@REM -- custom windows payloads should be in powershell,tclsh (or sh/bash if available) code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@REM @ECHO commandlineascalled %0 +@REM @ECHO commandlineresolved %~f0 +@CALL :getNormalizedScriptTail nftail +@REM @ECHO normalizedscripttail %nftail% +@CALL :getFileTail %0 clinetail +@REM @ECHO clinetail %clinetail% +@CALL :stringToUpper %~nx0 capscripttail +@REM @ECHO capscriptname: %capscripttail% + +@IF "%nftail%"=="%capscripttail%" ( + @ECHO forcing asadmin=1 due to file name on filesystem being uppercase + @SET "asadmin=1" +) else ( + @CALL :stringToUpper %clinetail% capcmdlinetail + @REM @ECHO capcmdlinetail !capcmdlinetail! + IF "%clinetail%"=="!capcmdlinetail!" ( + @ECHO forcing asadmin=1 due to cmdline scriptname in uppercase + @set "asadmin=1" + ) +) +@SET "vbsGetPrivileges=%temp%\punk_bat_elevate_%fname%.vbs" +@SET arglist=%* +@SET "qstrippedargs=args%arglist%" +@SET "qstrippedargs=%qstrippedargs:"=%" +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" ( + GOTO :gotPrivileges +) +@IF !asadmin!==1 ( + net file 1>NUL 2>NUL + @IF '!errorlevel!'=='0' ( GOTO :gotPrivileges ) else ( GOTO :getPrivileges ) +) +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@REM +@GOTO skip_privileges +:getPrivileges +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" (echo PUNK-ELEVATED & shift /1 & goto :gotPrivileges ) +@ECHO Set UAC = CreateObject^("Shell.Application"^) > "%vbsGetPrivileges%" +@ECHO args = "PUNK-ELEVATED " >> "%vbsGetPrivileges%" +@ECHO For Each strArg in WScript.Arguments >> "%vbsGetPrivileges%" +@ECHO args = args ^& strArg ^& " " >> "%vbsGetPrivileges%" +@ECHO Next >> "%vbsGetPrivileges%" +@ECHO UAC.ShellExecute "%~dp0%~n0%~x0", args, "", "runas", 1 >> "%vbsGetPrivileges%" +@ECHO Launching script in new windows due to administrator elevation +@"%SystemRoot%\System32\WScript.exe" "%vbsGetPrivileges%" %* +@EXIT /B + +:gotPrivileges +@REM setlocal & pushd . +@PUSHD . +@cd /d %~dp0 +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" ( + @DEL "%vbsGetPrivileges%" 1>nul 2>nul + @SET arglist=%arglist:~14% +) + +:skip_privileges +@SET need_ps1=0 +@REM we want the ps1 to exist even if the nextshell isn't powershell +@if not exist "%~dp0%~n0.ps1" ( + @SET need_ps1=1 +) ELSE ( + fc "%~dp0%~n0%~x0" "%~dp0%~n0.ps1" >nul || goto different + @REM @ECHO "files same" + @SET need_ps1=0 +) +@GOTO :pscontinue +:different +@REM @ECHO "files differ" +@SET need_ps1=1 +:pscontinue +@IF !need_ps1!==1 ( + COPY "%~dp0%~n0%~x0" "%~dp0%~n0.ps1" >NUL +) +@REM avoid using CALL to launch pwsh,tclsh etc - it will intercept some args such as /? +@IF "!shells[%nextshell%]!"=="pwsh" ( + REM pws vs powershell hasn't been tested because we didn't need to copy cmd to ps1 this time + REM test availability of preferred option of powershell7+ pwsh + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + REM fallback to powershell if pwsh failed + IF !pwshtest_exitcode!==0 ( + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; "%~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + REM CALL powershell -nop -nol -c write-host powershell-found + REM powershell -nop -nol -file "%~dp0%~n0.ps1" %* + powershell -nop -nol -c set-executionpolicy -Scope Process Unrestricted; %~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF "!shells[%nextshell%]!"=="bash" ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + !shells[%nextshell%]! "!wslpath!%fname%" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh on windows uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + !shells[%nextshell%]! "%~dp0%fname%" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value ^(%nextshell%^) !shells[%nextshell%]! valid options are %validshells% + SET task_exitcode=66 + @REM boundary padding + @REM boundary padding + GOTO :exit_multishell + ) + ) +) +@REM batch file library functions +@REM boundary padding +@GOTO :endlib + +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@EXIT /B + +:getFileTail +@REM return tail of file without any normalization e.g c:/punkshell/bin/Punk.cmd returns Punk.cmd even if file is punk.cmd +@REM we can't use things such as %~nx1 as it can change capitalisation +@REM This function is designed explicitly to preserve capitalisation +@REM accepts full paths with either / or \ as delimiters - or +@SETLOCAL + @SET "rtrn=%~2" + @SET "arg=%~1" + @REM @SET "result=%_arg:*/=%" + @REM @SET "result=%~1" + @SET LF=^ + + + : The above 2 empty lines are important. Don't remove + @CALL :stringContains "!arg!" "\" hasBackSlash + @IF "!hasBackslash!"=="true" ( + @for %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:\=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @CALL :stringContains "!arg!" "/" hasForwardSlash + @IF "!hasForwardSlash!"=="true" ( + @FOR %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:/=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @set "result=%arg%" + ) + ) +@ENDLOCAL & ( + @if "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B +@REM boundary padding +@REM boundary padding +:getNormalizedScriptTail +@SETLOCAL + @SET "result=%~nx0" + @SET "rtrn=%~1" +@ENDLOCAL & ( + @IF "%~1" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:getNormalizedFileTailFromPath +@REM warn via echo, and do not set return variable if path not found +@REM note that %~nx1 does not preserve case of provided path - hence the name 'normalized' +@REM boundary padding +@REM boundary padding +@REM boundary padding +@REM boundary padding +@SETLOCAL + @CALL :stringContains %~1 "\" hasBackSlash + @CALL :stringContains %~1 "/" hasForwardSlash + @IF "%hasBackslash%-%hasForwardslash%"=="false-false" ( + @SET "P=%cd%%~1" + @CALL :getNormalizedFileTailFromPath "!P!" ftail2 + @SET "result=!ftail2!" + ) else ( + @IF EXIST "%~1" ( + @SET "result=%~nx1" + ) else ( + @ECHO error getNormalizedFileTailFromPath file not found: %~1 + @EXIT /B 1 + ) + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + @ECHO getNormalizedFileTailFromPath %1 result: %result% + ) +) +@EXIT /B + +:stringContains +@REM usage: @CALL:stringContains string needle returnvarname +@SETLOCAL + @SET "rtrn=%~3" + @SET "string=%~1" + @SET "needle=%~2" + @IF "!string:%needle%=!"=="!string!" @( + @SET "result=false" + ) ELSE ( + @SET "result=true" + ) +@ENDLOCAL & ( + @IF "%~3" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringContains %string% %needle% result: %result% + ) +) +@EXIT /B + +:stringToUpper +@SETLOCAL + @SET "rtrn=%~2" + @SET "string=%~1" + @SET "capstring=%~1" + @FOR %%A in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) DO @( + @SET "capstring=!capstring:%%A=%%A!" + ) + @SET "result=!capstring!" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringToUpper %string% result: %result% + ) +) +@EXIT /B + +:isNumeric +@SETLOCAL + @SET "notnumeric="&FOR /F "delims=0123456789" %%i in ("%1") do set "notnumeric=%%i" + @IF defined notnumeric ( + @SET "result=false" + ) else ( + @SET "result=true" + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:endlib +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit_multishell +@GOTO :exit_multishell +# } +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup and restore +Hide :exit_multishell;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + +# +# + + +# +# + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : # +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust the %nextshell% value above +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +exitcode=0 +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "sh/bash reporting tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- Perl script section +# -- leave the script below as is, if all that is required is launching the Tcl payload" +# -- +# -- Note that perl script isn't called by default when simply running this script by name +# -- adjust the nextshell value at the top of the script to point to perl +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +=cut +#!/user/bin/perl +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin perl Payload +my $exit_code = 0; +#use ExtUtils::Installed; +#my $installed = ExtUtils::Installed->new(); +#my @modules = $installed->modules(); +#print "Modules:\n"; +#foreach my $m (@modules) { +# print "$m\n"; +#} +# -- --- --- + + + +my $scriptname = $0; +print "perl $scriptname\n"; +my $i =1; +foreach my $a(@ARGV) { + print "Arg # $i: $a\n"; +} + +# +# + + + +# -- --- --- --- --- --- --- --- +# +$exit_code=system("tclsh", $scriptname, @ARGV); +#print "perl reporting tcl exitcode: $exit_code"; +# +# -- --- --- --- --- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end perl Payload +exit $exit_code; +__END__ + +# end hide sh/bash/perl block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- Do not edit if current file is the .ps1 +# -- Edit the corresponding .cmd and it will autocopy +# -- unbalanced braces { } here *even in comments* will cause problems if there was no Tcl exit or return above +# -- custom script should generally go below the begin_powershell_payload line +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = GetScriptName +function GetDynamicParamDictionary { + [CmdletBinding()] + param( + [Parameter(ValueFromPipeline=$true, Mandatory=$true)] + [string] $CommandName + ) + + begin { + # Get a list of params that should be ignored (they're common to all advanced functions) + $CommonParameterNames = [System.Runtime.Serialization.FormatterServices]::GetUninitializedObject([type] [System.Management.Automation.Internal.CommonParameters]) | + Get-Member -MemberType Properties | + Select-Object -ExpandProperty Name + } + + process { + # Create the dictionary that this scriptblock will return: + $DynParamDictionary = New-Object System.Management.Automation.RuntimeDefinedParameterDictionary + + # Convert to object array and get rid of Common params: + (Get-Command $CommandName | select -exp Parameters).GetEnumerator() | + Where-Object { $CommonParameterNames -notcontains $_.Key } | + ForEach-Object { + $DynamicParameter = New-Object System.Management.Automation.RuntimeDefinedParameter ( + $_.Key, + $_.Value.ParameterType, + $_.Value.Attributes + ) + $DynParamDictionary.Add($_.Key, $DynamicParameter) + } + + # Return the dynamic parameters + return $DynParamDictionary + } +} +# GetDynamicParamDictionary +# - This can make it easier to share a single set of param definitions between functions +# - sample usage +#function ParameterDefinitions { +# param( +# [Parameter(Mandatory)][string] $myargument +# ) +#} +#function psmain { +# [CmdletBinding()] +# param() +# dynamicparam { GetDynamicParamDictionary ParameterDefinitions } +# process { +# #called once with $PSBoundParameters dictionary +# #can be used to validate arguments, or set a simpler variable name for access +# switch ($PSBoundParameters.keys) { +# 'myargumentname' { +# Set-Variable -Name $_ -Value $PSBoundParameters."$_" +# } +# #... +# } +# foreach ($boundparam in $PSBoundParameters.GetEnumerator()) { +# #... +# } +# } +# end { +# #Main function logic +# Write-Host "myargumentname value is: $myargumentname" +# #myotherfunction @PSBoundParameters +# } +#} +#psmain @args +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +# +# -- --- --- --- --- --- --- --- + + +# +# + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: comment end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: multishell doubled-up cmd exit label - return exitcode +:exit_multishell +:exit_multishell +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@IF "is%qstrippedargs:~4,13%"=="isPUNK-ELEVATED" (echo. & @cmd /k echo elevated prompt: type exit to quit) +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: comment end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +no script engine should try to run me +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell-old.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell-old.cmd new file mode 100644 index 00000000..1cb9e0ef --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell-old.cmd @@ -0,0 +1,270 @@ +set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershell;proc Hide s {proc $s args {}}; Hide :;rename set s2;Hide set;s2 1 list]"; set -- : "$@"; $1 = @' +: heredoc1 - hide from powershell (close sqote for unix shells) ' \ +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl \ +: "[Hide @ECHO; Hide ); Hide (;Hide echo]#not necessary but can help avoid errs in testing" +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +@REM { +@REM DO NOT MODIFY FIRST LINE OF THIS SCRIPT. shebang #! line is not required and will reduce functionality. +@REM Even comment lines can be part of the functionality of this script - modify with care. +@REM Change the value of nextshell in the next line if desired, and code within payload sections as appropriate. +@SET "nextshell=pwsh" +@REM nextshell set to pwsh,sh,bash or tclsh +@REM @ECHO nextshell is %nextshell% +@SET "validshells=pwsh,sh,bash,tclsh" +@CALL SET keyRemoved=%%validshells:%nextshell%=%% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix) +@REM -- This section intended only to launch the next shell +@REM -- Avoid customising this if possible. cmd/batch script is probably the least expressive language. +@REM -- custom windows payloads should be in powershell,tclsh or sh/bash code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@IF %nextshell%==pwsh ( + CALL pwsh -nop -c set-executionpolicy -Scope CurrentUser RemoteSigned + COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL + REM test availability of preferred option of powershell7+ pwsh + CALL pwsh -nop -nol -c write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + IF !pwshtest_exitcode!==0 CALL pwsh -nop -nol "%~dp0%~n0.ps1" %* & SET task_exitcode=!errorlevel! + REM fallback to powershell if pwsh failed + IF NOT !pwshtest_exitcode!==0 ( + REM CALL powershell -nop -nol -c write-host powershell-found + CALL powershell -nop -nol -file "%~dp0%~n0.ps1" %* + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF %nextshell%==bash ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + CALL %nextshell% "!wslpath!%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + CALL %nextshell% "%~dp0%fname%" %* & SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value %nextshell% valid options are %validshells% + SET task_exitcode=66 + GOTO :exit + ) + ) +) +@GOTO :endlib +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@GOTO :eof +:endlib + +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit +@GOTO :exit +# } +# rem call %nextshell% "%~dp0%~n0.cmd" %* +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s2 set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup +Hide :exit;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust @call line above ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +exitcode=0 ;#default assumption +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# end hide sh/bash block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = getScriptName +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +# +# -- --- --- --- --- --- --- --- + + +# +# + +# unbal } + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: cmd exit label - return exitcode +:exit +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd new file mode 100644 index 00000000..0e6b9ff9 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd @@ -0,0 +1,661 @@ +: "punk MULTISHELL - shebangless polyglot for Tcl Perl sh bash cmd pwsh powershell" + "[rename set s;proc Hide x {proc $x args {}};Hide :]" + "\$(function : {<#pwsh#>})" + "perlhide" + qw^ +set -- "$@" "a=[Hide <#;Hide set;s 1 list]"; set -- : "$@";$1 = @' +: heredoc1 - hide from powershell using @ and squote above. close sqote for unix shells + ' \ +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl + \ +: "[Hide @GOTO; Hide =begin; Hide @REM] #not necessary but can help avoid errs in testing" + +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: STRONG SUGGESTION: DO NOT MODIFY FIRST LINE OF THIS SCRIPT - except for first double quoted section. +: shebang line is not required on unix or windows and will reduce functionality and/or portability. +: Even comment lines can be part of the functionality of this script (both on unix and windows) - modify with care. +@GOTO :skip_perl_pod_start ^; +=begin excludeperl +: skip_perl_pod_start +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +: { +@REM ############################################################################################################################ +@REM THIS IS A POLYGLOT SCRIPT - supporting payloads in Tcl, bash, sh and/or powershelll (powershell.exe or pwsh.exe) +@REM It should remain portable between unix-like OSes & windows if the proper structure is maintained. +@REM ############################################################################################################################ +@REM On windows, change the value of nextshell to one of the listed 2 digit values if desired, and add code within payload sections for tcl,sh,bash,powershell as appropriate. +@REM This wrapper can be edited manually (carefully!) - or sh,bash,tcl,powershell scripts can be wrapped using the Tcl-based punkshell system +@REM e.g from within a running punkshell: pmix scriptwrap.multishell -outputfolder +@REM On unix-like systems, call with sh, bash or tclsh. (powershell untested on unix - and requires wscript if security elevation is used) +@REM Due to lack of shebang (#! line) Unix-like systems will probably (hopefully) default to sh if the script is called without an interpreter - but it may depend on the shell in use when called. +@REM If you find yourself really wanting/needing to add a shebang line - do so on the basis that the script will exist on unix-like systems only. +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "validshells= ^(10^) 'pwsh' ^(11^) 'sh' (^12^) 'bash' (^13^) 'tclsh' (^14^) 'perl'" +@SET "shells[10]=pwsh" +@SET "shells[11]=sh" +@set "shells[12]=bash" +@SET "shells[13]=tclsh" +@SET "shells[14]=perl" +: +@SET "nextshell=13" +: +@rem asadmin is for automatic elevation to administrator. Separate window will be created (seems unavoidable with current elevation mechanism) and user will still get security prompt (probably reasonable). +: +@SET "asadmin=0" +: +@REM nextshell set to index for validshells .eg 10 for pwsh +@REM @ECHO nextshell is %nextshell% +@SET "selected=!shells[%nextshell%]!" +@REM @ECHO selected %selected% +@CALL SET "keyRemoved=%%validshells:'!selected!'=%%" +@REM @ECHO keyremoved %keyRemoved% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix but should be left in place) +@REM -- This section intended mainly to launch the next shell (and to escalate privileges if necessary) +@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script can be useful, but is probably the least expressive language and most error prone. +@REM -- For example - as this file needs to use unix-style lf line-endings - the label scanner is susceptible to the 512Byte boundary issue: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 +@REM -- This label issue can be triggered/abused in files with crlf line endings too - but it is less likely to happen accidentaly. +@REm -- See also: https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts/4095133#4095133 +@REM ############################################################################################################################ +@REM -- Due to this issue -seemingly trivial edits of the batch file section can break the script! (for Windows anyway) +@REM -- Even something as simple as adding or removing an @REM +@REM -- From within punkshell - use: +@REM -- pmix scriptwrap.checkfile +@REM -- to check your templates or final wrapped scripts for byte boundary issues +@REM -- It will report any labels that are on boundaries +@REM -- This is why the nextshell value above is a 2 digit key instead of a string - so that editing the value doesn't change the byte offsets. +@REM -- Editing your sh,bash,tcl,pwsh payloads is much less likely to cause an issue. There is the possibility of the final batch :exit_multishell label spanning a boundary - so testing using pmix scriptwrap.checkfile is still recommended. +@REM -- Alternatively, as you should do anyway - test the final script on windows +@REM -- Aside from adding comments/whitespace to tweak the location of labels - you can try duplicating the label (e.g just add the label on a line above) but this is not guaranteed to work in all situations. +@REM -- '@REM' is a safer comment mechanism than a leading colon - which is used sparingly here. +@REM -- A colon anywhere in the script that happens to land on a 512 Byte boundary (from file start or from a callsite) could be misinterpreted as a label +@REM -- It is unknown what versions of cmd interpreters behave this way - and pmix scriptwrap.checkfile doesn't check all such boundaries. +@REm -- For this reason, batch labels should be chosen to be relatively unlikely to collide with other strings in the file, and simple names such as :exit or :end should probably be avoided +@REM ############################################################################################################################ +@REM -- custom windows payloads should be in powershell,tclsh (or sh/bash if available) code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@REM @ECHO commandlineascalled %0 +@REM @ECHO commandlineresolved %~f0 +@CALL :getNormalizedScriptTail nftail +@REM @ECHO normalizedscripttail %nftail% +@CALL :getFileTail %0 clinetail +@REM @ECHO clinetail %clinetail% +@CALL :stringToUpper %~nx0 capscripttail +@REM @ECHO capscriptname: %capscripttail% + +@IF "%nftail%"=="%capscripttail%" ( + @ECHO forcing asadmin=1 due to file name on filesystem being uppercase + @SET "asadmin=1" +) else ( + @CALL :stringToUpper %clinetail% capcmdlinetail + @REM @ECHO capcmdlinetail !capcmdlinetail! + IF "%clinetail%"=="!capcmdlinetail!" ( + @ECHO forcing asadmin=1 due to cmdline scriptname in uppercase + @set "asadmin=1" + ) +) +@SET "vbsGetPrivileges=%temp%\punk_bat_elevate_%fname%.vbs" +@SET arglist=%* +@IF "%1"=="PUNK-ELEVATED" ( + GOTO :gotPrivileges +) +@IF !asadmin!==1 ( + net file 1>NUL 2>NUL + @IF '!errorlevel!'=='0' ( GOTO :gotPrivileges ) else ( GOTO :getPrivileges ) +) +@GOTO skip_privileges +:getPrivileges +@IF '%1'=='PUNK-ELEVATED' (echo PUNK-ELEVATED & shift /1 & goto :gotPrivileges ) +@ECHO Set UAC = CreateObject^("Shell.Application"^) > "%vbsGetPrivileges%" +@ECHO args = "PUNK-ELEVATED " >> "%vbsGetPrivileges%" +@ECHO For Each strArg in WScript.Arguments >> "%vbsGetPrivileges%" +@ECHO args = args ^& strArg ^& " " >> "%vbsGetPrivileges%" +@ECHO Next >> "%vbsGetPrivileges%" +@ECHO UAC.ShellExecute "%~dp0%~n0%~x0", args, "", "runas", 1 >> "%vbsGetPrivileges%" +@ECHO Launching script in new windows due to administrator elevation +@"%SystemRoot%\System32\WScript.exe" "%vbsGetPrivileges%" %* +@EXIT /B + +:gotPrivileges +@REM setlocal & pushd . +@PUSHD . +@cd /d %~dp0 +@IF "%1"=="PUNK-ELEVATED" ( + @DEL "%vbsGetPrivileges%" 1>nul 2>nul + @SET arglist=%arglist:~14% +) + +:skip_privileges +@SET need_ps1=0 +@REM we want the ps1 to exist even if the nextshell isn't powershell +@if not exist "%~dp0%~n0.ps1" ( + @SET need_ps1=1 +) ELSE ( + fc "%~dp0%~n0%~x0" "%~dp0%~n0.ps1" >nul || goto different + @REM @ECHO "files same" + @SET need_ps1=0 +) +@GOTO :pscontinue +:different +@REM @ECHO "files differ" +@SET need_ps1=1 +:pscontinue +@IF !need_ps1!==1 ( + COPY "%~dp0%~n0%~x0" "%~dp0%~n0.ps1" >NUL +) +@REM avoid using CALL to launch pwsh,tclsh etc - it will intercept some args such as /? +@IF "!shells[%nextshell%]!"=="pwsh" ( + REM pws vs powershell hasn't been tested because we didn't need to copy cmd to ps1 this time + REM test availability of preferred option of powershell7+ pwsh + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + REM fallback to powershell if pwsh failed + IF !pwshtest_exitcode!==0 ( + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; "%~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + REM CALL powershell -nop -nol -c write-host powershell-found + REM powershell -nop -nol -file "%~dp0%~n0.ps1" %* + powershell -nop -nol -c set-executionpolicy -Scope Process Unrestricted; %~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF "!shells[%nextshell%]!"=="bash" ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + !shells[%nextshell%]! "!wslpath!%fname%" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh on windows uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + !shells[%nextshell%]! "%~dp0%fname%" %arglist% + SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value ^(%nextshell%^) !shells[%nextshell%]! valid options are %validshells% + SET task_exitcode=66 + @REM boundary padding + GOTO :exit_multishell + ) + ) +) +@REM batch file library functions +@REM boundary padding +@GOTO :endlib + +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@EXIT /B + +:getFileTail +@REM return tail of file without any normalization e.g c:/punkshell/bin/Punk.cmd returns Punk.cmd even if file is punk.cmd +@REM we can't use things such as %~nx1 as it can change capitalisation +@REM This function is designed explicitly to preserve capitalisation +@REM accepts full paths with either / or \ as delimiters - or +@SETLOCAL + @SET "rtrn=%~2" + @SET "arg=%~1" + @REM @SET "result=%_arg:*/=%" + @REM @SET "result=%~1" + @SET LF=^ + + + : The above 2 empty lines are important. Don't remove + @CALL :stringContains "!arg!" "\" hasBackSlash + @IF "!hasBackslash!"=="true" ( + @for %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:\=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @CALL :stringContains "!arg!" "/" hasForwardSlash + @IF "!hasForwardSlash!"=="true" ( + @FOR %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:/=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @set "result=%arg%" + ) + ) +@ENDLOCAL & ( + @if "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B +@REM boundary padding +@REM boundary padding +:getNormalizedScriptTail +@SETLOCAL + @SET "result=%~nx0" + @SET "rtrn=%~1" +@ENDLOCAL & ( + @IF "%~1" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:getNormalizedFileTailFromPath +@REM warn via echo, and do not set return variable if path not found +@REM note that %~nx1 does not preserve case of provided path - hence the name 'normalized' +@REM boundary padding +@REM boundary padding +@REM boundary padding +@REM boundary padding +@SETLOCAL + @CALL :stringContains %~1 "\" hasBackSlash + @CALL :stringContains %~1 "/" hasForwardSlash + @IF "%hasBackslash%-%hasForwardslash%"=="false-false" ( + @SET "P=%cd%%~1" + @CALL :getNormalizedFileTailFromPath "!P!" ftail2 + @SET "result=!ftail2!" + ) else ( + @IF EXIST "%~1" ( + @SET "result=%~nx1" + ) else ( + @ECHO error getNormalizedFileTailFromPath file not found: %~1 + @EXIT /B 1 + ) + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + @ECHO getNormalizedFileTailFromPath %1 result: %result% + ) +) +@EXIT /B + +:stringContains +@REM usage: @CALL:stringContains string needle returnvarname +@SETLOCAL + @SET "rtrn=%~3" + @SET "string=%~1" + @SET "needle=%~2" + @IF "!string:%needle%=!"=="!string!" @( + @SET "result=false" + ) ELSE ( + @SET "result=true" + ) +@ENDLOCAL & ( + @IF "%~3" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringContains %string% %needle% result: %result% + ) +) +@EXIT /B + +:stringToUpper +@SETLOCAL + @SET "rtrn=%~2" + @SET "string=%~1" + @SET "capstring=%~1" + @FOR %%A in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) DO @( + @SET "capstring=!capstring:%%A=%%A!" + ) + @SET "result=!capstring!" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringToUpper %string% result: %result% + ) +) +@EXIT /B + +:isNumeric +@SETLOCAL + @SET "notnumeric="&FOR /F "delims=0123456789" %%i in ("%1") do set "notnumeric=%%i" + @IF defined notnumeric ( + @SET "result=false" + ) else ( + @SET "result=true" + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:endlib +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit_multishell +@GOTO :exit_multishell +# } +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup and restore +Hide :exit_multishell;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + +# +# + + +# +# + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : # +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust the %nextshell% value above +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +exitcode=0 +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "sh/bash reporting tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- Perl script section +# -- leave the script below as is, if all that is required is launching the Tcl payload" +# -- +# -- Note that perl script isn't called by default when simply running this script by name +# -- adjust the nextshell value at the top of the script to point to perl +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +=cut +#!/user/bin/perl +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin perl Payload +my $exit_code = 0; +#use ExtUtils::Installed; +#my $installed = ExtUtils::Installed->new(); +#my @modules = $installed->modules(); +#print "Modules:\n"; +#foreach my $m (@modules) { +# print "$m\n"; +#} +# -- --- --- + + + +my $scriptname = $0; +print "perl $scriptname\n"; +my $i =1; +foreach my $a(@ARGV) { + print "Arg # $i: $a\n"; +} + +# +# + + + +# -- --- --- --- --- --- --- --- +# +$exit_code=system("tclsh", $scriptname, @ARGV); +#print "perl reporting tcl exitcode: $exit_code"; +# +# -- --- --- --- --- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end perl Payload +exit $exit_code; +__END__ + +# end hide sh/bash/perl block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- Do not edit if current file is the .ps1 +# -- Edit the corresponding .cmd and it will autocopy +# -- unbalanced braces { } here *even in comments* will cause problems if there was no Tcl exit or return above +# -- custom script should generally go below the begin_powershell_payload line +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = GetScriptName +function GetDynamicParamDictionary { + [CmdletBinding()] + param( + [Parameter(ValueFromPipeline=$true, Mandatory=$true)] + [string] $CommandName + ) + + begin { + # Get a list of params that should be ignored (they're common to all advanced functions) + $CommonParameterNames = [System.Runtime.Serialization.FormatterServices]::GetUninitializedObject([type] [System.Management.Automation.Internal.CommonParameters]) | + Get-Member -MemberType Properties | + Select-Object -ExpandProperty Name + } + + process { + # Create the dictionary that this scriptblock will return: + $DynParamDictionary = New-Object System.Management.Automation.RuntimeDefinedParameterDictionary + + # Convert to object array and get rid of Common params: + (Get-Command $CommandName | select -exp Parameters).GetEnumerator() | + Where-Object { $CommonParameterNames -notcontains $_.Key } | + ForEach-Object { + $DynamicParameter = New-Object System.Management.Automation.RuntimeDefinedParameter ( + $_.Key, + $_.Value.ParameterType, + $_.Value.Attributes + ) + $DynParamDictionary.Add($_.Key, $DynamicParameter) + } + + # Return the dynamic parameters + return $DynParamDictionary + } +} +# GetDynamicParamDictionary +# - This can make it easier to share a single set of param definitions between functions +# - sample usage +#function ParameterDefinitions { +# param( +# [Parameter(Mandatory)][string] $myargument +# ) +#} +#function psmain { +# [CmdletBinding()] +# param() +# dynamicparam { GetDynamicParamDictionary ParameterDefinitions } +# process { +# #called once with $PSBoundParameters dictionary +# #can be used to validate arguments, or set a simpler variable name for access +# switch ($PSBoundParameters.keys) { +# 'myargumentname' { +# Set-Variable -Name $_ -Value $PSBoundParameters."$_" +# } +# #... +# } +# foreach ($boundparam in $PSBoundParameters.GetEnumerator()) { +# #... +# } +# } +# end { +# #Main function logic +# Write-Host "myargumentname value is: $myargumentname" +# #myotherfunction @PSBoundParameters +# } +#} +#psmain @args +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +# +# -- --- --- --- --- --- --- --- + + +# +# + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: comment end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: multishell doubled-up cmd exit label - return exitcode +:exit_multishell +:exit_multishell +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@IF "%1"=="PUNK-ELEVATED" (echo. & @cmd /k echo elevated prompt: type exit to quit) +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: comment end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +no script engine should try to run me +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell1.cmd b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell1.cmd new file mode 100644 index 00000000..17fe4c15 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell1.cmd @@ -0,0 +1,524 @@ +: "[rename set s;proc Hide x {proc $x args {}};Hide :]" "\$(function : {<#pwsh#>})" ^ +set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershell;proc Hide x {proc $x args {}}; Hide <#;Hide set;s 1 list]"; set -- : "$@";$1 = @' +: heredoc1 - hide from powershell using @ and squote above. (close sqote for unix shells) ' \ +: .bat/.cmd launch section, leading colon hides from cmd, trailing slash hides next line from tcl \ +: "[Hide @ECHO; Hide ); Hide (;Hide echo; Hide @REM]#not necessary but can help avoid errs in testing" +: << 'HEREDOC1B_HIDE_FROM_BASH_AND_SH' +: Continuation char at end of this line and rem with curly-braces used to exlude Tcl from the whole cmd block \ +: { +: STRONG SUGGESTION: DO NOT MODIFY FIRST LINE OF THIS SCRIPT. shebang #! line is not required on unix or windows and will reduce functionality and/or portability. +: Even comment lines can be part of the functionality of this script (both on unix and windows) - modify with care. +@REM ############################################################################################################################ +@REM THIS IS A POLYGLOT SCRIPT - supporting payloads in Tcl, bash, sh and/or powershelll (powershell.exe or pwsh.exe) +@REM It should remain portable between unix-like OSes & windows if the proper structure is maintained. +@REM ############################################################################################################################ +@REM On windows, change the value of nextshell to one of the listed 2 digit values if desired, and add code within payload sections for tcl,sh,bash,powershell as appropriate. +@REM This wrapper can be edited manually (carefully!) - or sh,bash,tcl,powershell scripts can be wrapped using the Tcl-based punkshell system +@REM e.g from within a running punkshell: pmix scriptwrap.multishell -outputfolder +@REM On unix-like systems, call with sh, bash or tclsh. (powershell untested on unix - and requires wscript if security elevation is used) +@REM Due to lack of shebang (#! line) Unix-like systems will probably (hopefully) default to sh if the script is called without an interpreter - but it may depend on the shell in use when called. +@REM If you find yourself really wanting/needing to add a shebang line - do so on the basis that the script will exist on unix-like systems only. +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "validshells= ^(10^) 'pwsh' ^(11^) 'sh' (^12^) 'bash' (^13^) 'tclsh'" +@SET "shells[10]=pwsh" +@SET "shells[11]=sh" +@set "shells[12]=bash" +@SET "shells[13]=tclsh" +: +@SET "nextshell=13" +: +@rem asadmin is for automatic elevation to administrator. Separate window will be created (seems unavoidable with current elevation mechanism) and user will still get security prompt (probably reasonable). +: +@SET "asadmin=0" +: +@REM nextshell set to index for validshells .eg 10 for pwsh +@REM @ECHO nextshell is %nextshell% +@SET "selected=!shells[%nextshell%]!" +@REM @ECHO selected %selected% +@CALL SET "keyRemoved=%%validshells:'!selected!'=%%" +@REM @ECHO keyremoved %keyRemoved% +@REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@REM -- cmd/batch file section (ignored on unix but should be left in place) +@REM -- This section intended mainly to launch the next shell (and to escalate privileges if necessary) +@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script can be useful, but is probably the least expressive language and most error prone. +@REM -- For example - as this file needs to use unix-style lf line-endings - the label scanner is susceptible to the 512Byte boundary issue: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 +@REM -- This label issue can be triggered/abused in files with crlf line endings too - but it is less likely to happen accidentaly. +@REm -- See also: https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts/4095133#4095133 +@REM ############################################################################################################################ +@REM -- Due to this issue -seemingly trivial edits of the batch file section can break the script! (for Windows anyway) +@REM -- Even something as simple as adding or removing an @REM +@REM -- From within punkshell - use: +@REM -- pmix scriptwrap.checkfile +@REM -- to check your templates or final wrapped scripts for byte boundary issues +@REM -- It will report any labels that are on boundaries +@REM -- This is why the nextshell value above is a 2 digit key instead of a string - so that editing the value doesn't change the byte offsets. +@REM -- Editing your sh,bash,tcl,pwsh payloads is much less likely to cause an issue. There is the possibility of the final batch :exit_multishell label spanning a boundary - so testing using pmix scriptwrap.checkfile is still recommended. +@REM -- Alternatively, as you should do anyway - test the final script on windows +@REM -- Aside from adding comments/whitespace to tweak the location of labels - you can try duplicating the label (e.g just add the label on a line above) but this is not guaranteed to work in all situations. +@REM -- '@REM' is a safer comment mechanism than a leading colon - which is used sparingly here. +@REM -- A colon anywhere in the script that happens to land on a 512 Byte boundary (from file start or from a callsite) could be misinterpreted as a label +@REM -- It is unknown what versions of cmd interpreters behave this way - and pmix scriptwrap.checkfile doesn't check all such boundaries. +@REm -- For this reason, batch labels should be chosen to be relatively unlikely to collide with other strings in the file, and simple names such as :exit or :end should probably be avoided +@REM ############################################################################################################################ +@REM -- custom windows payloads should be in powershell,tclsh (or sh/bash if available) code sections +@REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### +@SET "winpath=%~dp0" +@SET "fname=%~nx0" +@REM @ECHO fname %fname% +@REM @ECHO winpath %winpath% +@REM @ECHO commandlineascalled %0 +@REM @ECHO commandlineresolved %~f0 +@CALL :getNormalizedScriptTail nftail +@REM @ECHO normalizedscripttail %nftail% +@CALL :getFileTail %0 clinetail +@REM @ECHO clinetail %clinetail% +@CALL :stringToUpper %~nx0 capscripttail +@REM @ECHO capscriptname: %capscripttail% + +@IF "%nftail%"=="%capscripttail%" ( + @ECHO forcing asadmin=1 due to file name on filesystem being uppercase + @SET "asadmin=1" +) else ( + @CALL :stringToUpper %clinetail% capcmdlinetail + @REM @ECHO capcmdlinetail !capcmdlinetail! + IF "%clinetail%"=="!capcmdlinetail!" ( + @ECHO forcing asadmin=1 due to cmdline scriptname in uppercase + @set "asadmin=1" + ) +) +@SET "vbsGetPrivileges=%temp%\punk_bat_elevate_%fname%.vbs" +@SET arglist=%* +@IF "%1"=="PUNK-ELEVATED" ( + GOTO :gotPrivileges +) +@IF !asadmin!==1 ( + net file 1>NUL 2>NUL + @IF '!errorlevel!'=='0' ( GOTO :gotPrivileges ) else ( GOTO :getPrivileges ) +) +@GOTO skip_privileges +:getPrivileges +@IF '%1'=='PUNK-ELEVATED' (echo PUNK-ELEVATED & shift /1 & goto :gotPrivileges ) +@ECHO Set UAC = CreateObject^("Shell.Application"^) > "%vbsGetPrivileges%" +@ECHO args = "PUNK-ELEVATED " >> "%vbsGetPrivileges%" +@ECHO For Each strArg in WScript.Arguments >> "%vbsGetPrivileges%" +@ECHO args = args ^& strArg ^& " " >> "%vbsGetPrivileges%" +@ECHO Next >> "%vbsGetPrivileges%" +@ECHO UAC.ShellExecute "%~dp0%~n0.cmd", args, "", "runas", 1 >> "%vbsGetPrivileges%" +@ECHO Launching script in new windows due to administrator elevation +@"%SystemRoot%\System32\WScript.exe" "%vbsGetPrivileges%" %* +@EXIT /B + +:gotPrivileges +@REM setlocal & pushd . +@PUSHD . +@cd /d %~dp0 +@IF "%1"=="PUNK-ELEVATED" ( + @DEL "%vbsGetPrivileges%" 1>nul 2>nul + @SET arglist=%arglist:~14% +) + +:skip_privileges +@SET need_ps1=0 +@REM we want the ps1 to exist even if the nextshell isn't powershell +@if not exist "%~dp0%~n0.ps1" ( + @SET need_ps1=1 +) ELSE ( + fc "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >nul || goto different + @REM @ECHO "files same" + @SET need_ps1=0 +) +@GOTO :pscontinue +:different +@REM @ECHO "files differ" +@SET need_ps1=1 +:pscontinue +@IF !need_ps1!==1 ( + COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL +) +@REM avoid using CALL to launch pwsh,tclsh etc - it will intercept some args such as /? +@IF "!shells[%nextshell%]!"=="pwsh" ( + REM pws vs powershell hasn't been tested because we didn't need to copy cmd to ps1 this time + REM test availability of preferred option of powershell7+ pwsh + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; write-host "statusmessage: pwsh-found" >NUL + SET pwshtest_exitcode=!errorlevel! + REM ECHO pwshtest_exitcode !pwshtest_exitcode! + REM fallback to powershell if pwsh failed + IF !pwshtest_exitcode!==0 ( + pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; "%~dp0%~n0.ps1" %arglist% & SET task_exitcode=!errorlevel! + ) ELSE ( + REM CALL powershell -nop -nol -c write-host powershell-found + REM powershell -nop -nol -file "%~dp0%~n0.ps1" %* + powershell -nop -nol -c set-executionpolicy -Scope Process Unrestricted; %~dp0%~n0.ps1" %arglist% + SET task_exitcode=!errorlevel! + ) +) ELSE ( + IF "!shells[%nextshell%]!"=="bash" ( + CALL :getWslPath %winpath% wslpath + REM ECHO wslfullpath "!wslpath!%fname%" + !shells[%nextshell%]! "!wslpath!%fname%" %arglist% & SET task_exitcode=!errorlevel! + ) ELSE ( + REM probably tclsh or sh + IF NOT "x%keyRemoved%"=="x%validshells%" ( + REM sh on windows uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl + REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx + !shells[%nextshell%]! "%~dp0%fname%" %arglist% & SET task_exitcode=!errorlevel! + ) ELSE ( + ECHO %fname% has invalid nextshell value ^(%nextshell%^) !shells[%nextshell%]! valid options are %validshells% + SET task_exitcode=66 + GOTO :exit_multishell + ) + ) +) +@REM batch file library functions +@GOTO :endlib + +:getWslPath +@SETLOCAL + @SET "_path=%~p1" + @SET "name=%~nx1" + @SET "drive=%~d1" + @SET "rtrn=%~2" + @SET "result=/mnt/%drive:~0,1%%_path:\=/%%name%" +@ENDLOCAL & ( + @if "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + ECHO %result% + ) +) +@EXIT /B + +:getFileTail +@REM return tail of file without any normalization e.g c:/punkshell/bin/Punk.cmd returns Punk.cmd even if file is punk.cmd +@REM we can't use things such as %~nx1 as it can change capitalisation +@REM This function is designed explicitly to preserve capitalisation +@REM accepts full paths with either / or \ as delimiters - or +@SETLOCAL + @SET "rtrn=%~2" + @SET "arg=%~1" + @REM @SET "result=%_arg:*/=%" + @REM @SET "result=%~1" + @SET LF=^ + + + : The above 2 empty lines are important. Don't remove + @CALL :stringContains "!arg!" "\" hasBackSlash + @IF "!hasBackslash!"=="true" ( + @for %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:\=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @CALL :stringContains "!arg!" "/" hasForwardSlash + @IF "!hasForwardSlash!"=="true" ( + @FOR %%A in ("!LF!") do @( + @FOR /F %%B in ("!arg:/=%%~A!") do @set "result=%%B" + ) + ) ELSE ( + @set "result=%arg%" + ) + ) +@ENDLOCAL & ( + @if "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B +@REM boundary padding +:getNormalizedScriptTail +@SETLOCAL + @SET "result=%~nx0" + @SET "rtrn=%~1" +@ENDLOCAL & ( + @IF "%~1" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:getNormalizedFileTailFromPath +@REM warn via echo, and do not set return variable if path not found +@REM note that %~nx1 does not preserve case of provided path - hence the name 'normalized' +@REM boundary padding +@REM boundary padding +@SETLOCAL + @CALL :stringContains %~1 "\" hasBackSlash + @CALL :stringContains %~1 "/" hasForwardSlash + @IF "%hasBackslash%-%hasForwardslash%"=="false-false" ( + @SET "P=%cd%%~1" + @CALL :getNormalizedFileTailFromPath "!P!" ftail2 + @SET "result=!ftail2!" + ) else ( + @IF EXIST "%~1" ( + @SET "result=%~nx1" + ) else ( + @ECHO error getNormalizedFileTailFromPath file not found: %~1 + @EXIT /B 1 + ) + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + SET "%rtrn%=%result%" + ) ELSE ( + @ECHO getNormalizedFileTailFromPath %1 result: %result% + ) +) +@EXIT /B + +:stringContains +@REM usage: @CALL:stringContains string needle returnvarname +@SETLOCAL + @SET "rtrn=%~3" + @SET "string=%~1" + @SET "needle=%~2" + @IF "!string:%needle%=!"=="!string!" @( + @SET "result=false" + ) ELSE ( + @SET "result=true" + ) +@ENDLOCAL & ( + @IF "%~3" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringContains %string% %needle% result: %result% + ) +) +@EXIT /B + +:stringToUpper +@SETLOCAL + @SET "rtrn=%~2" + @SET "string=%~1" + @SET "capstring=%~1" + @FOR %%A in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) DO @( + @SET "capstring=!capstring:%%A=%%A!" + ) + @SET "result=!capstring!" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO stringToUpper %string% result: %result% + ) +) +@EXIT /B + +:isNumeric +@SETLOCAL + @SET "notnumeric="&FOR /F "delims=0123456789" %%i in ("%1") do set "notnumeric=%%i" + @IF defined notnumeric ( + @SET "result=false" + ) else ( + @SET "result=true" + ) + @SET "rtrn=%~2" +@ENDLOCAL & ( + @IF "%~2" neq "" ( + @SET "%rtrn%=%result%" + ) ELSE ( + @ECHO %result% + ) +) +@EXIT /B + +:endlib +: \ +@REM @SET taskexit_code=!errorlevel! & goto :exit_multishell +@GOTO :exit_multishell +# } +# -*- tcl -*- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- tcl script section +# -- This is a punk multishell file +# -- Primary payload target is Tcl, with sh,bash,powershell as helpers +# -- but it may equally be used with any of these being the primary script. +# -- It is tuned to run when called as a batch file, a tcl script a sh/bash script or a pwsh/powershell script +# -- i.e it is a polyglot file. +# -- The specific layout including some lines that appear just as comments is quite sensitive to change. +# -- It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# -- e.g ./filename.polypunk.cmd in sh or bash +# -- e.g tclsh filename.cmd +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +rename set ""; rename s set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup and restore +Hide :exit_multishell;Hide {<#};Hide '@ +namespace eval ::punk::multishell { + set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script [file dirname [file normalize [info script]/__]] + if {[info exists argv0] && + $last_script eq $last_script_root + } { + set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode + } else { + set ::punk::multishell::is_main($last_script) 0 ;#sourced - likely to be being used as a library - no launch, no exit. Can use return. + } + if {"::punk::multishell::is_main" ni [info commands ::punk::multishell::is_main]} { + proc ::punk::multishell::is_main {{script_name {}}} { + if {$script_name eq ""} { + set script_name [file dirname [file normalize [info script]/--]] + } + if {![info exists ::punk::multishell::is_main($script_name)]} { + #e.g a .dll or something else unanticipated + puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" + puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + return 0 + } + return [set ::punk::multishell::is_main($script_name)] + } + } +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" +#puts "argv0 : $::argv0" +# -- --- --- --- --- --- --- --- --- --- --- --- + + +# +# + + + +# -- --- --- --- --- --- --- --- --- --- --- --- +# -- Best practice is to always return or exit above, or just by leaving the below defaults in place. +# -- If the multishell script is modified to have Tcl below the Tcl Payload section, +# -- then Tcl bracket balancing needs to be carefully managed in the shell and powershell sections below. +# -- Only the # in front of the two relevant if statements below needs to be removed to enable Tcl below +# -- but the sh/bash 'then' and 'fi' would also need to be uncommented. +# -- This facility left in place for experiments on whether configuration payloads etc can be appended +# -- to tail of file - possibly binary with ctrl-z char - but utility is dependent on which other interpreters/shells +# -- can be made to ignore/cope with such data. +if {[::punk::multishell::is_main]} { + exit 0 +} else { + return +} +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +# end hide from unix shells \ +HEREDOC1B_HIDE_FROM_BASH_AND_SH +# sh/bash \ +shift && set -- "${@:1:$#-1}" +#------------------------------------------------------ +# -- This if block only needed if Tcl didn't exit or return above. +if false==false # else { + then + : # +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- sh/bash script section +# -- leave as is if all that is required is launching the Tcl payload" +# -- +# -- Note that sh/bash script isn't called when running a .bat/.cmd from cmd.exe on windows by default +# -- adjust @call line above ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate +# -- if sh/bash scripting needs to run on windows too. +# -- +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin sh Payload +#printf "start of bash or sh code" + +# +# + +# -- --- --- --- --- --- --- --- +# +exitcode=0 ;#default assumption +#-- sh/bash launches Tcl here instead of shebang line at top +#-- use exec to use exitcode (if any) directly from the tcl script +#exec /usr/bin/env tclsh "$0" "$@" +#-- alternative - can run sh/bash script after the tcl call. +/usr/bin/env tclsh "$0" "$@" +exitcode=$? +#echo "tcl exitcode: ${exitcode}" +#-- override exitcode example +#exit 66 +# +# -- --- --- --- --- --- --- --- + +# +# + + +#printf "sh/bash done \n" +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end sh Payload +#------------------------------------------------------ +fi +exit ${exitcode} +# end hide sh/bash block from Tcl +# This comment with closing brace should stay in place whether if commented or not } +#------------------------------------------------------ +# begin hide powershell-block from Tcl - only needed if Tcl didn't exit or return above +if 0 { +: end heredoc1 - end hide from powershell \ +'@ +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +# -- powershell/pwsh section +# -- Do not edit if current file is the .ps1 +# -- Edit the corresponding .cmd and it will autocopy +# -- unbalanced braces { } here *even in comments* will cause problems if there was no Tcl exit or return above +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### +function GetScriptName { $myInvocation.ScriptName } +$scriptname = getScriptName +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---begin powershell Payload +#"Timestamp : {0,10:yyyy-MM-dd HH:mm:ss}" -f $(Get-Date) | write-host +#"Script Name : {0}" -f $scriptname | write-host +#"Powershell Version: {0}" -f $PSVersionTable.PSVersion.Major | write-host +#"powershell args : {0}" -f ($args -join ", ") | write-host +# -- --- --- --- + +# +# + + +# -- --- --- --- --- --- --- --- +# +tclsh $scriptname $args +# +# -- --- --- --- --- --- --- --- + + +# +# + +# -- --- --- --- --- --- --- --- --- --- --- --- --- ---end powershell Payload +#"powershell reporting exitcode: {0}" -f $LASTEXITCODE | write-host +Exit $LASTEXITCODE +# heredoc2 for powershell to ignore block below +$1 = @' +' +: comment end hide powershell-block from Tcl \ +# This comment with closing brace should stay in place whether 'if' commented or not } +: multishell doubled-up cmd exit label - return exitcode +:exit_multishell +:exit_multishell +: \ +@REM @ECHO exitcode: !task_exitcode! +: \ +@IF "%1"=="PUNK-ELEVATED" (echo. & @cmd /k echo elevated prompt: type exit to quit) +: \ +@EXIT /B !task_exitcode! +# cmd has exited +: comment end heredoc2 \ +'@ +<# +# id:tailblock0 +# -- powershell multiline comment +#> +<# +# id:tailblock1 +# + +# +# -- unreachable by tcl directly if ctrl-z character is in the section above. (but file can be read and split on \x1A) +# -- Potential for zip and/or base64 contents, but we can't stop pwsh parser from slurping in the data +# -- so for example a plain text tar archive could cause problems depending on the content. +# -- final line in file must be the powershell multiline comment terminator or other data it can handle. +# -- e.g plain # comment lines will work too +# -- (for example a powershell digital signature is a # commented block of data at the end of the file) +#> + + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-shellbat.bat b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-shellbat.bat new file mode 100644 index 00000000..aa9039a9 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/scriptappwrappers/punk-shellbat.bat @@ -0,0 +1,112 @@ +: "[proc : args {}]" ;# *tcl shellbat - call with sh,bash,tclsh on any platform, or with cmd on windows. +: <<'HIDE_FROM_BASH_AND_SH' +: ;# leading colon hides from .bat, trailing slash hides next line from tcl \ +@call tclsh "%~dp0%~n0.bat" %* +: ;#\ +@set taskexitcode=%errorlevel% & goto :exit +# -*- tcl -*- +# ################################################################################################# +# This is a tcl shellbat file +# It is tuned to run when called as a batch file, a tcl script, an sh script or a bash script, +# so the specific layout and characters used are quite sensitive to change. +# It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# e.g ./filename.sh.bat in sh or bash or powershell +# e.g filename.sh or filename.sh.bat at windows command prompt +# e.g tclsh filename.sh.bat | sh filename.sh.bat | bash filename.sh.bat +# In all cases an arbitrary number of arguments are accepted +# To avoid the initial commandline on stdout when calling as a batch file on windows, use: +# cmd /Q /c filename.sh.bat +# (because we cannot use @if to silence it, as this isn't understood by tcl,sh or bash) +# ################################################################################################# +#fconfigure stdout -translation crlf +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" + + +# +# + +# --- --- --- --- --- --- --- --- --- --- --- --- --- +# only exit if needed. see exitcode notes at bottom of file and exit there for consistency across invocation methods +# --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +#-- +#-- bash/sh code follows. +#-- protect from tcl using line continuation char on the previous comment for each line, like so: \ +printf "etc" +#-- or alternatively place sh/bash script within the false==false block +#-- whilst being careful to balance braces {} +#-- For more complex needs you should call out to external scripts +#-- +#-- END marker for hide_from_bash_and_sh\ +HIDE_FROM_BASH_AND_SH + +#--------------------------------------------------------- +#-- This if statement hides(mostly) a sh/bash code block from Tcl +if false==false # else { +then +: +#--------------------------------------------------------- + #-- leave as is if all that's required is launching the Tcl payload" + #-- + #-- Note that sh/bash script isn't called when running a .bat from cmd.exe on windows by default + #-- adjust line 4: @call tclsh ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate + #-- if sh/bash scripting needs to run on windows too. + #-- + #printf "start of bash or sh code" + + # + # + + + #-- sh/bash launches Tcl here instead of shebang line at top + # + #-- use exec to use exitcode (if any) directly from the tcl script + exec /usr/bin/env tclsh "$0" "$@" + # + + #-- alternative - if sh/bash script required to run after the tcl call. + #/usr/bin/env tclsh "$0" "$@" + #tcl_exitcode=$? + #echo "tcl_exitcode: ${tcl_exitcode}" + + # + # + + #-- override exitcode example + #exit 66 + + #printf "No need for trailing slashes for sh/bash code here\n" +#--------------------------------------------------------- +fi +# closing brace for Tcl } +#--------------------------------------------------------- + +#-- tcl and shell script now both active + +#-- comment for line sample 1 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 1 \n" + +#-- comment for line sample 2 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 2 \n" + + +#-- Consistent exitcode from sh,bash,tclsh or cmd +#-- Call exit in tcl (or sh/bash) code only if explicitly required, otherwise leave this commented out. +#-- (script might be more widely useable without explicit exit. e.g in tcl: set ::argc 1; set ::argv "val"; source filename.sh.bat ) +#-- exit line unprotected by trailing slash will work for tcl and/or sh/bash +#exit 0 +#exit 42 + + + +#-- make sure sh/bash/tcl all skip over .bat style exit \ +: <<'shell_end' +#-- .bat exit with exitcode from tcl process \ +:exit +: ;# \ +@exit /B %taskexitcode% +# .bat has exited \ +shell_end + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/shellbat.txt b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/shellbat.txt new file mode 100644 index 00000000..25c7d1d8 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/shellbat.txt @@ -0,0 +1,104 @@ +: "[proc : args {}]" ;# *tcl shellbat - call with sh,bash,tclsh on any platform, or with cmd on windows. +: <<'HIDE_FROM_BASH_AND_SH' +: ;# leading colon hides from .bat, trailing slash hides next line from tcl \ +@call tclsh "%~dp0%~n0.bat" %* +: ;#\ +@set taskexitcode=%errorlevel% & goto :exit +# -*- tcl -*- +# ################################################################################################# +# This is a tcl shellbat file +# It is tuned to run when called as a batch file, a tcl script, an sh script or a bash script, +# so the specific layout and characters used are quite sensitive to change. +# It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# e.g ./filename.sh.bat in sh or bash or powershell +# e.g filename.sh or filename.sh.bat at windows command prompt +# e.g tclsh filename.sh.bat | sh filename.sh.bat | bash filename.sh.bat +# In all cases an arbitrary number of arguments are accepted +# To avoid the initial commandline on stdout when calling as a batch file on windows, use: +# cmd /Q /c filename.sh.bat +# (because we cannot use @if to silence it, as this isn't understood by tcl,sh or bash) +# ################################################################################################# +#fconfigure stdout -translation crlf +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" + + +# + +# --- --- --- --- --- --- --- --- --- --- --- --- --- +# only exit if needed. see exitcode notes at bottom of file and exit there for consistency across invocation methods +# --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +#-- +#-- bash/sh code follows. +#-- protect from tcl using line continuation char on the previous comment for each line, like so: \ +printf "etc" +#-- or alternatively place sh/bash script within the false==false block +#-- whilst being careful to balance braces {} +#-- For more complex needs you should call out to external scripts +#-- +#-- END marker for hide_from_bash_and_sh\ +HIDE_FROM_BASH_AND_SH + +#--------------------------------------------------------- +#-- This if statement hides(mostly) a sh/bash code block from Tcl +if false==false # else { +then +: +#--------------------------------------------------------- + #-- leave as is if all that's required is launching the Tcl payload" + #-- + #-- Note that sh/bash script isn't called when running a .bat from cmd.exe on windows by default + #-- adjust line 4: @call tclsh ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate + #-- if sh/bash scripting needs to run on windows too. + #-- + #printf "start of bash or sh code" + + + #-- sh/bash launches Tcl here instead of shebang line at top + + #-- use exec to use exitcode (if any) directly from the tcl script + exec /usr/bin/env tclsh "$0" "$@" + + #-- alternative - if sh/bash script required to run after the tcl call. + #/usr/bin/env tclsh "$0" "$@" + #tcl_exitcode=$? + #echo "tcl_exitcode: ${tcl_exitcode}" + + #-- override exitcode example + #exit 66 + + #printf "No need for trailing slashes for sh/bash code here\n" +#--------------------------------------------------------- +fi +# closing brace for Tcl } +#--------------------------------------------------------- + +#-- tcl and shell script now both active + +#-- comment for line sample 1 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 1 \n" + +#-- comment for line sample 2 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 2 \n" + + +#-- Consistent exitcode from sh,bash,tclsh or cmd +#-- Call exit in tcl (or sh/bash) code only if explicitly required, otherwise leave this commented out. +#-- (script might be more widely useable without explicit exit. e.g in tcl: set ::argc 1; set ::argv "val"; source filename.sh.bat ) +#-- exit line unprotected by trailing slash will work for tcl and/or sh/bash +#exit 0 +#exit 42 + + + +#-- make sure sh/bash/tcl all skip over .bat style exit \ +: <<'shell_end' +#-- .bat exit with exitcode from tcl process \ +:exit +: ;# \ +@exit /B %taskexitcode% +# .bat has exited \ +shell_end + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/shellbat_v1.txt b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/shellbat_v1.txt new file mode 100644 index 00000000..e504ee01 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/shellbat_v1.txt @@ -0,0 +1,106 @@ +if (true=="shellbat") #;#\ +: <<'HIDE_FROM_BASH_AND_SH' +::lindex tcl;# leading colons hide from .bat, trailing slash hides next line from tcl \ +@call tclsh "%~dp0%~n0.bat" %* +::lindex tcl;#\ +@set taskexitcode=%errorlevel% & goto :exit +# -*- tcl -*- +# ################################################################################################# +# This is a tcl shellbat file +# It is tuned to run when called as a batch file, a tcl script, an sh script or a bash script, +# so the specific layout and characters used are quite sensitive to change. +# It can be called on unix or windows platforms with or without the interpreter being specified on the commandline. +# e.g ./filename.sh.bat in sh or bash or powershell +# e.g filename.sh or filename.sh.bat at windows command prompt +# e.g tclsh filename.sh.bat | sh filename.sh.bat | bash filename.sh.bat +# In all cases an arbitrary number of arguments are accepted +# To avoid the initial commandline on stdout when calling as a batch file on windows, use: +# cmd /Q /c filename.sh.bat +# (because we cannot use @if to silence it, as this isn't understood by tcl,sh or bash) +# ################################################################################################# +#fconfigure stdout -translation crlf +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl Payload +#puts "script : [info script]" +#puts "argcount : $::argc" +#puts "argvalues: $::argv" + +# + +# --- --- --- --- --- --- --- --- --- --- --- --- --- +# only exit if needed. see exitcode notes at bottom of file and exit there for consistency across invocation methods +# --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl Payload +#-- +#-- bash/sh code follows. +#-- protect from tcl using line continuation char on the previous comment for each line, like so: \ +printf "etc" +#-- or alternatively place sh/bash script within the false==false block +#-- whilst being careful to balance braces {} +#-- For more complex needs you should call out to external scripts +#-- +#-- END marker for hide_from_bash_and_sh\ +HIDE_FROM_BASH_AND_SH +#\ +then + +#--------------------------------------------------------- +if false==false # else { +then +: +#--------------------------------------------------------- + #-- leave as is if all that's required is launching the Tcl payload" + #-- + #-- Note that sh/bash script isn't called when running a .bat from cmd.exe on windows by default + #-- adjust line 4: @call tclsh ... to something like @call sh ... @call bash .. or @call env sh ... etc as appropriate + #-- if sh/bash scripting needs to run on windows too. + #-- + #printf "start of bash or sh code" + + + #-- sh/bash launches Tcl here instead of shebang line at top + + #-- use exec to use exitcode (if any) directly from the tcl script + exec /usr/bin/env tclsh "$0" "$@" + + #-- alternative - if sh/bash script required to run after the tcl call. + #/usr/bin/env tclsh "$0" "$@" + #tcl_exitcode=$? + #echo "tcl_exitcode: ${tcl_exitcode}" + + #-- override exitcode example + #exit 66 + + #printf "No need for trailing slashes for sh/bash code here\n" +#--------------------------------------------------------- +fi +# } +#--------------------------------------------------------- + +#-- comment for line sample 1 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 1 \n" + +#-- comment for line sample 2 with trailing continuation slash \ +#printf "tcl-invisible sh/bash line sample 2 \n" + + +#-- Consistent exitcode from sh,bash,tclsh or cmd +#-- Call exit in tcl (or sh/bash) code only if explicitly required, otherwise leave this commented out. +#-- (script might be more widely useable without explicit exit. e.g in tcl: set ::argc 1; set ::argv "val"; source filename.sh.bat ) +#-- exit line unprotected by trailing slash will work for tcl and/or sh/bash +#exit 0 +#exit 42 + + +#--------------------------------------------------------- +#-- end if true==shellbat on very first line\ +fi +#--------------------------------------------------------- + +#-- make sure sh/bash/tcl all skip over .bat style exit \ +: <<'shell_end' +#-- .bat exit with exitcode from tcl process \ +:exit +::lindex tcl;#\ +@exit /B %taskexitcode% +#\ +shell_end + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbatheader.txt b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbatheader.txt new file mode 100644 index 00000000..88326d54 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbatheader.txt @@ -0,0 +1,3 @@ +::lindex tcl;#\ +@call tclsh "%~dp0%~n0.bat" %* & goto :eof +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbattest.bat b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbattest.bat new file mode 100644 index 00000000..fd2e9511 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbattest.bat @@ -0,0 +1,8 @@ +::lindex tcl;#\ +@call tclsh "%~dp0%~n0.bat" %* & goto :eof +# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl +puts stdout "exe: [info nameof]" +puts stdout "scr: [info script]" +puts stdout "argc: $::argc" +puts stdout "argv: '$::argv'" + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbattest2.bat b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbattest2.bat new file mode 100644 index 00000000..4765515a --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/templates/utility/tclbattest2.bat @@ -0,0 +1,19 @@ +::set - { +@goto start +# -- tcl bat +:start +@echo off +set script=%0 +echo %* +if exist %script%.bat set script=%script%.bat +tclsh %script% %* +goto end of BAT file +};unset - ;# --- --- --- --- --- --- --- --- --- --- --- --- ---begin Tcl + +puts stdout "exe: [info nameof]" +puts stdout "scr: [info script]" +puts stdout "argc: $::argc" +puts stdout "argv: '$::argv'" + +# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---end Tcl\ +:end of BAT file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/util-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/util-0.1.0.tm new file mode 100644 index 00000000..aca7eeed --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/mix/util-0.1.0.tm @@ -0,0 +1,365 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::mix::util 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + +namespace eval punk::mix::util { + variable has_winpath 0 +} + +if {"windows" eq $::tcl_platform(platform)} { + if {![catch {package require punk::winpath}]} { + set punk::mix::util::has_winpath 1 + } +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::mix::util { + variable tmpfile_counter 0 ;#additional tmpfile collision avoidance + + namespace export * + + #NOTE fileutil::cat seems to silently ignore options if passed at end instead of before file! + proc fcat {args} { + variable has_winpath + + + set knownopts [list -eofchar -translation -encoding --] + set last_opt 0 + for {set i 0} {$i < [llength $args]} {incr i} { + set ival [lindex $args $i] + #puts stdout "i:$i a: $ival known: [expr {$ival in $knownopts}]" + if {$ival eq "--"} { + set last_opt $i + break + } else { + if {$ival in $knownopts} { + #puts ">known at $i : [lindex $args $i]" + if {($i % 2) != 0} { + error "unexpected option at index $i. known options: $knownopts must come in -opt val pairs." + } + incr i + set last_opt $i + } else { + set last_opt [expr {$i - 1}] + break + } + } + } + set first_non_opt [expr {$last_opt + 1}] + + #puts stderr "first_non_opt: $first_non_opt" + set opts [lrange $args -1 $first_non_opt-1] + set paths [lrange $args $first_non_opt end] + if {![llength $paths]} { + error "Unable to find file in the supplied arguments: $args. Ensure options are all -opt val pairs and that file name(s) follow" + } + + #puts stderr "opts: $opts paths: $paths" + + #let's proceed, but warn the user if an apparent option is in paths + foreach opt [list -encoding -eofchar -translation] { + if {$opt in $paths} { + puts stderr "fcat WARNING: apparent option $opt found after file argument(s) (expected them before filenames). Passing to fileutil::cat anyway - but for at least some versions, these options may be ignored. commandline 'fcat $args'" + } + } + + if {$::tcl_platform(platform) ne "windows"} { + return [fileutil::cat {*}$args] + } + + + set finalpaths [list] + foreach p $paths { + if {$has_winpath && [punk::winpath::illegalname_test $p]} { + lappend finalpaths [punk::winpath::illegalname_fix $p] + } else { + lappend finalpaths $p + } + } + fileutil::cat {*}$opts {*}$finalpaths + } + + #---------------------------------------- + namespace eval internal { + proc path_common_prefix_pop {varname} { + upvar 1 $varname var + set var [lassign $var head] + return $head + } + } + proc path_common_prefix {args} { + set dirs $args + set parts [file split [internal::path_common_prefix_pop dirs]] + while {[llength $dirs]} { + set r {} + foreach cmp $parts elt [file split [internal::path_common_prefix_pop dirs]] { + if {$cmp ne $elt} break + lappend r $cmp + } + set parts $r + } + if {[llength $parts]} { + return [file join {*}$parts] + } else { + return "" + } + } + + #retains case from first argument only - caseless comparison + proc path_common_prefix_nocase {args} { + set dirs $args + set parts [file split [internal::path_common_prefix_pop dirs]] + while {[llength $dirs]} { + set r {} + foreach cmp $parts elt [file split [internal::path_common_prefix_pop dirs]] { + if {![string equal -nocase $cmp $elt]} break + lappend r $cmp + } + set parts $r + } + if {[llength $parts]} { + return [file join {*}$parts] + } else { + return "" + } + } + #---------------------------------------- + + #namespace import ::punk::ns::nsimport_noclobber + + proc namespace_import_pattern_to_namespace_noclobber {pattern ns} { + set source_ns [namespace qualifiers $pattern] + if {![namespace exists $source_ns]} { + error "namespace_import_pattern_to_namespace_noclobber error namespace $source_ns not found" + } + if {![string match ::* $ns]} { + set nscaller [uplevel 1 {namespace current}] + set ns [punk::nsjoin $nscaller $ns] + } + set a_export_patterns [namespace eval $source_ns {namespace export}] + set a_commands [info commands $pattern] + set a_tails [lmap v $a_commands {namespace tail $v}] + set a_exported_tails [list] + foreach pattern $a_export_patterns { + set matches [lsearch -all -inline $a_tails $pattern] + foreach m $matches { + if {$m ni $a_exported_tails} { + lappend a_exported_tails $m + } + } + } + set imported_commands [list] + foreach e $a_exported_tails { + set imported [namespace eval $ns [string map [list $e $source_ns] { + set cmd "" + if {![catch {namespace import ::}]} { + set cmd + } + set cmd + }]] + if {[string length $imported]} { + lappend imported_commands $imported + } + } + return $imported_commands + } + + proc askuser {question} { + if {![catch {package require punk::lib}]} { + return [punk::lib::askuser $question] ;#takes account of terminal mode raw vs line (if punk::console used) + } + puts stdout $question + flush stdout + set stdin_state [fconfigure stdin] + fconfigure stdin -blocking 1 + set answer [gets stdin] + fconfigure stdin -blocking [dict get $stdin_state -blocking] + return $answer + } + + #review - can be surprising if caller unaware it uses try + proc do_in_path {path script} { + #from ::kettle::path::in + set here [pwd] + try { + cd $path + uplevel 1 $script + } finally { + cd $here + } + } + + proc foreach-file {path script_pathvariable script} { + upvar 1 $script_pathvariable thepath + + set known {} + lappend waiting $path + while {[llength $waiting]} { + set pending $waiting + set waiting {} + set at 0 + while {$at < [llength $pending]} { + set current [lindex $pending $at] + incr at + + # Do not follow into parent. + if {[string match *.. $current]} continue + + # Ignore what we have visited already. + set c [file dirname [file normalize $current/___]] + if {[dict exists $known $c]} continue + dict set known $c . + + if {[file tail $c] eq ".git"} { + continue + } + + # Expand directories. + if {[file isdirectory $c]} { + lappend waiting {*}[lsort -unique [glob -directory $c * .*]] + continue + } + + # Handle files as per the user's will. + set thepath $current + switch -exact -- [catch { uplevel 1 $script } result] { + 0 - 4 { + # ok, continue - nothing + } + 2 { + # return, abort, rethrow + return -code return + } + 3 { + # break, abort + return + } + 1 - default { + # error, any thing else - rethrow + return -code error $result + } + } + } + } + return + } + + proc is_valid_tm_version {versionpart} { + #Needs to be suitable for use with Tcl's 'package vcompare' + if {![catch [list package vcompare $versionpart $versionpart]]} { + return 1 + } else { + return 0 + } + } + #Note that semver only has a small overlap with tcl tm versions. + #todo - work out what overlap and whether it's even useful + #see also TIP #439: Semantic Versioning (tcl 9??) + proc semver {versionstring} { + set re {^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$} + } + #todo - semver conversion/validation for other systems? + proc magic_tm_version {} { + set magicbase 999999 ;#deliberately large so given load-preference when testing! + #we split the literal to avoid the literal appearing here - reduce risk of accidentally converting to a release version + return ${magicbase}.0a1.0 + } + + + + proc tmpfile {{prefix tmp_}} { + #note risk of collision if pregenerating a list of tmpfile names + #we will maintain an icrementing id so the caller doesn't have to bear that in mind + variable tmpfile_counter + global tcl_platform + return .punkutil_$prefix[pid]_[clock microseconds]_[incr tmpfile_counter]_[info hostname]_$tcl_platform(user) + } + + proc tmpdir {} { + # Taken from tcllib fileutil. + global tcl_platform env + + set attempdirs [list] + set problems {} + + foreach tmp {TEMP TMP TMPDIR} { + if { [info exists env($tmp)] } { + lappend attempdirs $env($tmp) + } else { + lappend problems "No environment variable $tmp" + } + } + + switch $tcl_platform(platform) { + windows { + lappend attempdirs "C:\\TEMP" "C:\\TMP" "\\TEMP" "\\TMP" + } + macintosh { + lappend attempdirs $env(TRASH_FOLDER) ;# a better place? + } + default { + lappend attempdirs \ + [file join / tmp] \ + [file join / var tmp] \ + [file join / usr tmp] + } + } + + lappend attempdirs [pwd] + + foreach tmp $attempdirs { + if { [file isdirectory $tmp] && + [file writable $tmp] } { + return [file normalize $tmp] + } elseif { ![file isdirectory $tmp] } { + lappend problems "Not a directory: $tmp" + } else { + lappend problems "Not writable: $tmp" + } + } + + # Fail if nothing worked. + return -code error "Unable to determine a proper directory for temporary files\n[join $problems \n]" + } + + + +} + + + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::mix::util [namespace eval punk::mix::util { + variable version + set version 0.1.0 +}] +return diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm new file mode 100644 index 00000000..70f924d7 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm @@ -0,0 +1,1911 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application punk::ns 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz +package require punk::lib +package require punk::args + +tcl::namespace::eval ::punk_dynamic::ns { + +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::ns { + variable ns_current "::" + variable ns_re_cache [dict create] ;#cache regular expressions used in globmatchns + namespace export nsjoin nsprefix nstail nsparts nseval nsimport_noclobber corp + + #leading colon makes it hard (impossible?) to call directly if not within the namespace + proc ns/ {v {ns_or_glob ""} args} { + variable ns_current ;#change active ns of repl by setting ns_current + + set ns_caller [uplevel 1 {::namespace current}] + #puts stderr "ns_cur:$ns_current ns_call:$ns_caller" + + + set types [list all] + set nspathcommands 0 + if {$v eq "/"} { + set types [list children] + } + if {$v eq "///"} { + set nspathcommands 1 + } + + #todo - cooperate with repl? + set out "" + if {$ns_or_glob eq ""} { + set is_absolute 1 + set ns_queried $ns_current + set out [nslist [nsjoin $ns_current *] -types $types -nspathcommands $nspathcommands] + } else { + set is_absolute [string match ::* $ns_or_glob] + set has_globchars [regexp {[*?]} $ns_or_glob] + if {$is_absolute} { + if {!$has_globchars} { + if {![tcl::namespace::exists $ns_or_glob]} { + error "cannot change to namespace $ns_or_glob" + } + set ns_current $ns_or_glob + set ns_queried $ns_current + tailcall ns/ $v "" + } else { + set ns_queried $ns_or_glob + set out [nslist $ns_or_glob -types $types -nspathcommands $nspathcommands] + } + } else { + if {!$has_globchars} { + set nsnext [nsjoin $ns_current $ns_or_glob] + if {![tcl::namespace::exists $nsnext]} { + error "cannot change to namespace $ns_or_glob" + } + set ns_current $nsnext + set ns_queried $nsnext + set out [nslist [nsjoin $nsnext *] -types $types -nspathcommands $nspathcommands] + } else { + set ns_queried [nsjoin $ns_current $ns_or_glob] + set out [nslist [nsjoin $ns_current $ns_or_glob] -types $types -nspathcommands $nspathcommands] + } + } + } + set ns_display "\n$ns_queried" + if {$ns_current eq $ns_queried} { + if {$ns_current in [info commands $ns_current] } { + if {![catch [list tcl::namespace::ensemble configure $ns_current] ensemble_info]} { + if {[llength $ensemble_info] > 0} { + #this namespace happens to match ensemble command. + #todo - keep cache of encountered ensembles from commands.. and examine namespace in the configure info. + set ns_display "\n[a+ yellow bold]$ns_current (ensemble)[a+]" + } + } + } + } + append out $ns_display + return $out + + + } + + + #create possibly nested namespace structure - but only if not already existant + proc n/new {args} { + variable ns_current + if {![llength $args]} { + error "usage: :/new \[ ...\]" + } + set a1 [lindex $args 0] + set is_absolute [string match ::* $a1] + if {$is_absolute} { + set nspath [nsjoinall {*}$args] + } else { + if {[string match :* $a1]} { + puts stderr "n/new WARNING namespace with leading colon '$a1' is likely to have unexpected results" + } + set nspath [nsjoinall $ns_current {*}$args] + } + + set ns_exists [nseval [nsprefix $nspath] [list ::tcl::namespace::exists [nstail $nspath] ]] + + if {$ns_exists} { + error "Namespace $nspath already exists" + } + #tcl::namespace::eval [nsprefix $nspath] [list tcl::namespace::eval [nstail $nspath] {}] + nseval [nsprefix $nspath] [list ::tcl::namespace::eval [nstail $nspath] {}] + n/ $nspath + } + + + #nn/ ::/ nsup/ - back up one namespace level + proc nsup/ {v args} { + variable ns_current + if {$ns_current eq "::"} { + puts stderr "Already at global namespace '::'" + } else { + set out "" + set nsq [nsprefix $ns_current] + if {$v eq "/"} { + set out [get_nslist -match [nsjoin $nsq *] -types [list children]] + } else { + set out [get_nslist -match [nsjoin $nsq *] -types [list all]] + } + #set out [nslist [nsjoin $nsq *]] + set ns_current $nsq + append out "\n$ns_current" + return $out + } + } + + #todo - walk up each ns - testing for possibly weirdly named namespaces + #review - do we even need it. + proc nsexists {nspath} { + error "unimplemented" + } + + #recursive nseval - for introspection of weird namespace trees + #approx 10x slower than normal tcl::namespace::eval - but still only a few microseconds.. fine for repl introspection + proc nseval_script {location} { + set parts [nsparts $location] + if {[lindex $parts 0] eq ""} { + lset parts 0 :: + } + if {[lindex $parts end] eq ""} { + set parts [lrange $parts 0 end-1] + } + + set body "" + set i 0 + set tails [lrepeat [llength $parts] ""] + foreach ns $parts { + set cmdlist [list ::tcl::namespace::eval $ns] + set t "" + if {$i > 0} { + append body " " + } + append body $cmdlist + if {$i == ([llength $parts] -1)} { + append body "