You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1462 lines
78 KiB

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2024
#
# @@ Meta Begin
# Application punk::fileline 999999.0a1.0
# Meta platform tcl
# Meta license BSD
# @@ Meta End
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin punkshell_module_punk::fileline 0 999999.0a1.0]
#[copyright "2024"]
#[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}]
#[moddesc {punk fileline}] [comment {-- Description at end of page heading --}]
#[require punk::fileline]
#[keywords module text parse file]
#[description]
#[para] -
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Overview]
#[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed)
#[para]This is important for certain text files where examining the number of chars/bytes is important
#[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved.
#[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data.
#[subsection Concepts]
#[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation.
#[example_begin]
# package require punk::fileline
# package require fileutil
# set rawdata [lb]fileutil::cat data.txt -translation binary[rb]
# punk::fileline::class::textinfo create obj_data $rawdata
# puts stdout [lb]obj_data linecount[rb]
#[example_end]
#[subsection Notes]
#[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files.
#[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired.
#[para]No support for lone carriage-returns being interpreted as line-endings.
#[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Requirements
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[subsection dependencies]
#[para] packages needed by punk::fileline
#[list_begin itemized]
package require Tcl 8.6
package require punk::args
#*** !doctools
#[item] [package {Tcl 8.6}]
#[item] [package {punk::args}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools
#[list_end] [comment {- end dependencies list -}]
#*** !doctools
#[subsection {optional dependencies}]
#[para] packages that add functionality but aren't strictly required
#[list_begin itemized]
#*** !doctools
#[item] [package {punk::ansi}]
#[para] - recommended for class::textinfo [method chunk_boundary_display]
#[item] [package {punk::char}]
#[para] - recommended for class::textinfo [method chunk_boundary_display]
#[item] [package {overtype}]
#[para] - recommended for class::textinfo [method chunk_boundary_display]
#*** !doctools
#[list_end] [comment {- end optional dependencies list -}]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section API]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# oo::class namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::fileline::class {
namespace export *
#*** !doctools
#[subsection {Namespace punk::fileline::class}]
#[para] class definitions
if {[info commands [namespace current]::textinfo] eq ""} {
#*** !doctools
#[list_begin enumerated]
# oo::class create interface_sample1 {
# #*** !doctools
# #[enum] CLASS [class interface_sample1]
# #[list_begin definitions]
# method test {arg1} {
# #*** !doctools
# #[call class::interface_sample1 [method test] [arg arg1]]
# #[para] test method
# puts "test: $arg1"
# }
# #*** !doctools
# #[list_end] [comment {-- end definitions interface_sample1}]
# }
#uses zero based indexing. Caller can add 1 for line numbers
oo::class create [namespace current]::textinfo {
#*** !doctools
#[enum] CLASS [class textinfo]
#[list_begin definitions]
# [para] [emph METHODS]
variable o_chunk ;#current state
variable o_chunkop_store
variable o_lineop_store
variable o_chunk_epoch
variable o_line_epoch
variable o_payloadlist
variable o_linemap
variable o_LF_C
variable o_CRLF_C
constructor {datachunk args} {
#*** !doctools
#[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]]
#[para] Constructor for textinfo object which represents a chunk or all of a file
#[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like:
#[example_begin]
# fconfigure $fd -translation binary
# set chunkdata [lb]read $fd[rb]]
#or
# set chunkdata [lb]fileutil::cat <filename> -translation binary[rb]
#[example_end]
#[para] when loading the data
namespace eval [namespace current] {
set nspath [namespace path]
foreach p [list ::punk::fileline ::punk::fileline::ansi] {
if {$p ni $nspath} {
lappend nspath $p
}
}
namespace path $nspath
}
set o_chunk $datachunk
set o_line_epoch [list]
set o_chunk_epoch [list "fromchunkchange-at-[clock micros]"]
set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message
set defaults [dict create\
-substitutionmap {}\
-crlf_lf_placeholders $crlf_lf_placeholders\
-userid "<pid-[pid]-tid-[thread::id]>"\
]
set known_opts [dict keys $defaults]
foreach {k v} $args {
if {$k ni $known_opts} {
error "[self] constructor error: unknown option '$k'. Known options: $known_opts"
}
}
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- ---
set opt_substitutionmap [dict get $opts -substitutionmap] ;#review - can be done by caller - or a loadable -policy
set opt_crlf_lf_placeholders [dict get $opts -crlf_lf_placeholders]
set opt_userid [dict get $opts -userid]
# -- --- --- --- --- --- ---
if {[llength $opt_crlf_lf_placeholders] != 2 || [string length [lindex $opt_crlf_lf_placeholders 0]] !=1 || [string length [lindex $opt_crlf_lf_placeholders 1]] !=1} {
error "textinfo::constructor error: -crlf_lf_placeholders requires a list of exactly 2 chars"
}
lassign $opt_crlf_lf_placeholders o_LF_C o_CRLF_C
if {[string first $o_LF_C $o_chunk] >=0} {
set decval [scan $o_LF_C %c]
if {$decval < 32 || $decval > 127} {
set char_desc "(decimal value $decval)"
} else {
set char_desc "'$o_LF_C' (decimal value $decval)"
}
error "textinfo::constructor error: rawfiledata already contains linefeed substitution character $char_desc specified as first element of -crlf_lf_placeholders"
}
if {[string first $o_CRLF_C $o_chunk] >=0} {
set decval [scan $o_CRLF_C %c]
if {$decval < 32 || $decval > 127} {
set char_desc "(decimal value $decval)"
} else {
set char_desc "'$o_CRLF_C' (decimal value $decval)"
}
error "textinfo::constructor error: rawfiledata already contains carriagereturn-linefeed substitution character $char_desc specified as second element of -crlf_lf_placeholders"
}
if {$o_LF_C eq $o_CRLF_C} {
puts stderr "WARNING: same substitution character used for both elements of -crlf_lf_placeholders - byte counting may be off if file contains mixed line-endings"
}
my regenerate_lines
}
method chunk {chunkstart chunkend} {
#*** !doctools
#[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]]
#[para]Return a range of bytes from the underlying raw chunk data.
#[para] e.g The following retrieves the entire chunk
#[para] objName chunk 0 end
return [string range $o_chunk $chunkstart $chunkend]
}
method chunklen {} {
#*** !doctools
#[call class::textinfo [method chunklen]]
#[para] Number of bytes/characters in the raw data of the file
return [string length $o_chunk]
}
method chunk_boundary_display {chunkstart chunkend chunksize args} {
#*** !doctools
#[call class::textinfo [method chunk_boundary_display]]
#[para]Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend
#[para]Defaults to using ansi colour if punk::ansi module is available. Use -ansi 0 to disable colour
set defaults [dict create\
-ansi $::punk::fileline::ansi::enabled\
-offset 0\
-displaybytes 200\
-truncatedmark "..."\
-completemark "---"\
-moremark " + "\
-continuemark " > "\
-linemaxwidth 100\
-linebase 0\
-limit -1\
-boundaries {}\
-showconfig 0\
-boundaryheader {Boundary %i% at %b%}\
]
set known_opts [dict keys $defaults]
foreach {k v} $args {
if {$k ni $known_opts} {
error "[self]::chunk_boundary error: unknown option '$k'. Known options: $known_opts"
}
}
set opts [dict merge $defaults $args]
# -- --- --- --- --- ---
set opt_ansi [dict get $opts -ansi]
set opt_offset [dict get $opts -offset]
set opt_displaybytes [dict get $opts -displaybytes]
set opt_tmark [dict get $opts -truncatedmark]
set opt_cmark [dict get $opts -completemark]
set opt_linemax [dict get $opts -linemaxwidth]
set opt_linebase [dict get $opts -linebase]
set opt_linebase [string map [list _ ""] $opt_linebase]
set opt_limit [dict get $opts -limit] ;#limit number of boundaries to display
set opt_boundaries [dict get $opts -boundaries] ;#use pre-calculated boundaries if supplied
set opt_showconfig [dict get $opts -showconfig]
set opt_boundaryheader [dict get $opts -boundaryheader]
# -- --- --- --- --- ---
package require overtype
# will require punk::char and punk::ansi
if {"::punk::fileline::ansi::stripansi" ne [info commands ::punk::fileline::ansi::stripansi]} {
namespace eval ::punk::fileline::ansi {
namespace import ::punk::ansi::*
}
}
#This mechanism for enabling/disabling ansi is a bit clumsy - prone to errors with regard to keeping in sync with any api changes in punk ansi
#It's done here to allow this to be used without the full set of punk modules and/or shell - REVIEW
#risk of failing to reset on error
set pre_ansi_enabled $::punk::fileline::ansi::enabled
if {$opt_ansi} {
set ::punk::fileline::ansi::enabled 1
} else {
set ::punk::fileline::ansi::enabled 0
}
if {"::punk::fileline::stripansi" ne [info commands ::punk::fileline::stripansi]} {
proc ::punk::fileline::a {args} {
if {$::punk::fileline::ansi::enabled} {
tailcall ::punk::fileline::ansi::a {*}$args
} else {
return ""
}
}
proc ::punk::fileline::a+ {args} {
if {$::punk::fileline::ansi::enabled} {
tailcall ::punk::fileline::ansi::a+ {*}$args
} else {
return ""
}
}
proc ::punk::fileline::stripansi {str} {
if {$::punk::fileline::ansi::enabled} {
tailcall ::punk::fileline::ansi::stripansi $str
} else {
return $str
}
}
}
set maxline [lindex [my chunkrange_to_linerange $chunkend $chunkend] 0]
set minline [lindex [my chunkrange_to_linerange $chunkstart $chunkstart] 0]
#suport simple end+-int (+-)start(+-)int to set linebase to line corresponding to chunkstart or chunkend
#also simple int+int and int-int - nothing more complicated (similar to Tcl lrange etc in that regard)
#commonly this will be something like -start or -end
if {![string is integer -strict $opt_linebase]} {
set sign ""
set errunrecognised "unrecognised -linebase value '$opt_linebase'. Expected positive or negative integer or -start -start-int -start+int -end -end-int -end+int or -eof (where leading - is optional but probably desirable) "
if {[string index $opt_linebase 0] eq "-"} {
set sign -
set tail [string range $opt_linebase 1 end]
} else {
set tail [string trimleft $opt_linebase +];#ignore +
}
if {[string match eof* $tail]} {
set endmath [string range $tail 3 end]
#todo endmath?
if {$tail eq "eof"} {
set lastline [lindex [my chunkrange_to_linerange end end] 0]
set linebase ${sign}$lastline
} else {
error $errunrecognised
}
} elseif {[string match end* $tail]} {
set endmath [string range $tail 3 end]
if {[string length $endmath]} {
set op [string index $endmath 0]
if {$op in {+ -}} {
set operand [string range $endmath 1 end]
if {[string is integer -strict $operand]} {
if {$op eq "+"} {
set linebase [expr {$maxline + $operand}]
} else {
set linebase [expr {$maxline - $operand}]
}
} else {
error $errunrecognised
}
} else {
error $errunrecognised
}
} else {
set linebase $maxline
}
set linebase ${sign}$linebase
} elseif {[string match start* $tail]} {
set endmath [string range $tail 5 end]
if {[string length $endmath]} {
set op [string index $endmath 0]
if {$op in {+ -}} {
set operand [string range $endmath 1 end]
if {[string is integer -strict $operand]} {
if {$op eq "+"} {
set linebase [expr {$minline + $operand}]
} else {
set linebase [expr {$minline - $operand}]
}
} else {
error $errunrecognised
}
} else {
error $errunrecognised
}
} else {
set linebase $minline
}
set linebase ${sign}$linebase
} elseif {[string match *-* $tail]} {
set extras [lassign [split $tail -] int1 int2]
if {[llength $extras]} {
error $errunrecognised
}
if {![string is integer -strict $int1] || ![string is integer -strict $int2]} {
error $errunrecognised
}
set linebase [expr {$int1 - $int2}]
set linebase ${sign}$linebase
} elseif {[string match *+* $tail]} {
set extras [lassign [split $tail +] int1 int2]
if {[llength $extras]} {
error $errunrecognised
}
if {![string is integer -strict $int1] || ![string is integer -strict $int2]} {
error $errunrecognised
}
set linebase [expr {$int1 + $int2}]
set linebase ${sign}$linebase
} else {
error $errunrecognised
}
} else {
set linebase $opt_linebase
}
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend
if {![llength $opt_boundaries]} {
set binfo [lib::range_spans_chunk_boundaries $chunkstart $chunkend $chunksize -offset $opt_offset]
set boundaries [dict get $binfo boundaries]
} else {
set boundaries [list]
foreach b $opt_boundaries {
if {$chunkstart <= $b && $chunkend >= $b} {
lappend boundaries [expr {$b + $opt_offset}]
}
}
}
if {![llength $boundaries]} {
return "No boundaries found between $chunkstart and $chunkend for chunksize $chunksize (when offset $opt_offset)"
}
if {$opt_showconfig} {
set result "chunk range $chunkstart $chunkend line range $minline $maxline linebase $linebase limit $opt_limit\n"
} else {
set result ""
}
set pre_bytes [expr {$opt_displaybytes /2}]
set post_bytes $pre_bytes
set max_bytes [expr {[my chunklen] -1}]
if {$opt_limit > 0} {
set boundaries [lrange $boundaries[unset boundaries] 0 $opt_limit-1]
}
set i 0
foreach b $boundaries {
if {$opt_boundaryheader ne ""} {
set j [expr {$i+1}]
append result [string map [list %b% $b %i% $i %j% $j] $opt_boundaryheader] \n
}
set low [expr {max(($b - $pre_bytes),0)}]
set high [expr {min(($b + $post_bytes),$max_bytes)}]
set lineinfolist [my chunkrange_to_lineinfolist $low $high -show_truncated 1]
set le_map [list \r\n <crlf> \r <cr> \n <lf>]
set result_list [list]
foreach lineinfo $lineinfolist {
set lineidx [dict get $lineinfo lineindex]
set linenum [expr {$lineidx + $linebase}]
set s [dict get $lineinfo start]
set e [dict get $lineinfo end]
set boundarymarker ""
set displayidx ""
set linenum_display $linenum
if {$s <= $b && $e >= $b} {
set idx [expr {$b - $s}] ;#index into whole position in whole line - not so useful if we're viewing a small section of a line
set char [string index [my line $lineidx] $idx]
set char_display [string map [list \r <CR> \n <LF>] $char]
if {[dict get $lineinfo is_truncated]} {
set tside [dict get $lineinfo truncatedside]
set truncated [dict get $lineinfo truncated]
set tlen [string length $truncated]
if {"left" in $tside} {
set tleft [dict get $lineinfo truncatedleft]
set tleftlen [string length $tleft]
set displayidx [expr {$idx - $tleftlen}]
} elseif {"right" in $tside} {
set displayidx $idx
}
} else {
set displayidx $idx
}
set boundarymarker "'[a+ green bold]$char_display[a]'@$displayidx"
set linenum_display ${linenum_display},$idx
}
set lhs_status $opt_cmark ;#default
set rhs_status $opt_cmark ;#default
if {[dict get $lineinfo is_truncated]} {
set line [dict get $lineinfo truncated]
set tside [dict get $lineinfo truncatedside]
if {"left" in $tside && "right" in $tside } {
set lhs_status $opt_tmark
set rhs_status $opt_tmark
} elseif {"left" in $tside} {
set lhs_status $opt_tmark
} elseif {"right" in $tside} {
set rhs_status $opt_tmark
}
} else {
set line [my line $lineidx]
}
if {$displayidx ne ""} {
set line [string replace $line $displayidx $displayidx [a+ White green bold]$char_display[a]]
}
set displayline [string map $le_map $line]
lappend result_list [list $linenum_display $boundarymarker $lhs_status $displayline $rhs_status]
}
set title_linenum "LNUM"
set linenums [lsearch -index 0 -all -inline -subindices $result_list *]
set markers [lsearch -index 1 -all -inline -subindices $result_list *]
set lines [lsearch -index 3 -all -inline -subindices $result_list *]
set title_marker ""
set title_line "Line"
#todo - use punk::char for unicode support of wide chars etc?
set widest_linenum [tcl::mathfunc::max {*}[lmap v [concat [list $title_linenum] $linenums] {string length $v}]]
set widest_marker [tcl::mathfunc::max {*}[lmap v [concat [list $title_marker] $markers] {string length [stripansi $v]}]]
set widest_status [expr {max([string length $opt_cmark], [string length $opt_tmark])}]
set widest_line [tcl::mathfunc::max {*}[lmap v [concat [list $title_line] $lines] {string length $v}]]
foreach row $result_list {
lassign $row linenum marker lhs_status line rhs_status
append result [format " %-*s " $widest_linenum $linenum]
append result [format " %-*s " $widest_marker $marker]
append result [format " %-*s " $widest_status $lhs_status]
append result [format " %-*s " $widest_line $line]
append result [format " %-*s " $widest_status $rhs_status] \n
}
incr i
}
set ::punk::fileline::ansi::enabled $pre_ansi_enabled
return $result
}
method linecount {} {
#*** !doctools
#[call class::textinfo [method linecount]]
#[para] Number of lines in the raw data of the file, counted as per the policy in effect
return [llength $o_payloadlist]
}
method line {lineindex} {
#*** !doctools
#[call class::textinfo [method line] [arg lineindex]]
#[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata
#[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting)
#[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none"
#[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending
lassign [my numeric_linerange $lineindex 0] lineindex
set le [dict get $o_linemap $lineindex le]
set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le]
return [lindex $o_payloadlist $lineindex]$le_chars
}
method chunk_find_glob {globsearch args} {
#todo - use linepayload_find_glob when -ignore_lineendings is 0 - but check truncations for 1st and last line
error "unimplemented"
}
method linepayload_find_glob {globsearch args} {
#*** !doctools
#[call class::textinfo [method linepayload_find_glob] [arg globsearch] [opt {option value...}]]
#[para]Return a lineinfolist (see [method lineinfo] and [method lineinfolist]) of lines where payload matches the [arg globsearch] string
#[para]To limit the returned results use the -limit n option - where -limit 0 means return all matches.
#[para]For example: [method linepayload_find_glob] "*test*" -limit 1
#[para]The result is always a list of lineinfo dictionaries even if one item is returned
#[para] -limitfrom can be start|end
#[para]The order of results is always the order as they occur in the data - even if -limitfrom end is specified.
#[para]-limitfrom end means that only the last -limit items are returned
#[para]Note that as glob accepts [lb]chars[rb]] to mean match any character in the set given by chars, searching for literal square brackets should be done by escaping the bracket with a backslash
#[para]This is true even if only a single square bracket is being searched for. e.g {*[lb]file*} will not find the word file followed by a left square-bracket - even though the search didn't close the square brackets.
#[para]In the above case - the literal search should be {*\[lb]file*}
set defaults [dict create\
-limit 0\
-strategy 1\
-start 0\
-end end\
-limitfrom start\
]
set known_opts [dict keys $defaults]
dict for {k v} $args {
if {$k ni $known_opts} {
error "linepayload_find_glob unknown option '$k'. Known options: $known_opts"
}
}
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_limit [dict get $opts -limit]
if {![string is integer -strict $opt_limit] || $opt_limit < 0} {
error "linepayload_find_glob -limit must be positive integer"
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_strategy [dict get $opts -strategy]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_start [dict get $opts -start]
set opt_start [expr {$opt_start}]
if {$opt_start != 0} {error "-start unimplemented"}
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_end [dict get $opts -end]
set max_line_index [expr {[llength $o_payloadlist]-1}]
if {$opt_end eq "end"} {
set opt_end $max_line_index
}
#TODO
if {$opt_end < $max_line_index} {error "-end less than max_line_index unimplemented"}
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_limitfrom [dict get $opts -limitfrom]
#-limitfrom start|end only
#TODO
if {$opt_limitfrom ne "start"} {error "-limitfrom unimplemented"}
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set lineinfolist [list]
if {$opt_limit == 1} {
set idx [lsearch -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch]
if {$idx >=0} {
set i [expr {$opt_start + $idx}]
lappend lineinfolist [list lineindex $i [dict get $o_linemap $i] payload [lindex $o_payloadlist $i]]
}
} elseif {$opt_limit == 0} {
set indices [lsearch -all -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch]
foreach irel $indices {
set i [expr {$opt_start + $irel}]
lappend lineinfolist [list lineindex $i [dict get $o_linemap $i] payload [lindex $o_payloadlist $i]]
}
} else {
#todo - auto-strategy based on limit vs number of lines
if {$opt_strategy == 0} {
set posn 0
for {set r 0} {$r < $opt_limit} {incr r} {
set n [lsearch [lrange $o_payloadlist $posn+$opt_start end] $globsearch]
if {$n >=0} {
set irel [expr {$posn + $n}]
set i [expr {$irel + $opt_start}]
lappend lineinfolist [list lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i]]
set posn [expr {$irel+1}]
}
}
} else {
set indices [lsearch -all -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch]
set limited [lrange $indices 0 $opt_limit-1]
foreach irel $limited {
set i [expr {$opt_start + $irel}]
lappend lineinfolist [list lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i]]
}
}
}
return $lineinfolist
}
method linepayload {lineindex} {
#*** !doctools
#[call class::textinfo [method linepayload] [arg lineindex]]
#[para]Return the text of the line indicated by the zero-based lineindex
#[para]The line-ending is not returned in the data - but is still stored against this lineindex
#[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method
#[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used
#[para]To retrieve an entire line including line-ending use the [method line] method.
lassign [my numeric_linerange $lineindex 0] lineindex
return [lindex $o_payloadlist $lineindex]
}
method linepayloads {startindex endindex} {
#*** !doctools
#[call class::textinfo [method linepayloads] [arg startindex] [arg endindex]]
#[para]Return a list of just the payloads in the specified linindex range, with no metadata.
return [lrange $o_payloadlist $startindex $endindex]
}
method linemeta {lineindex} {
#*** !doctools
#[call class::textinfo [method linemeta] [arg lineindex]]
#[para]Return a dict of the metadata for the line indicated by the zero-based lineindex
#[para]Keys returned include
#[list_begin itemized]
#[item] le
#[para] A string representing the type of line-ending: crlf|lf|none
#[item] linelen
#[para] The number of characters/bytes in the whole line including line-ending if any
#[item] payloadlen
#[para] The number of character/bytes in the line excluding line-ending
#[item] start
#[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins
#[item] end
#[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends
#[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload
#[list_end]
lassign [my numeric_linerange $lineindex 0] lineindex
dict get $o_linemap $lineindex
}
method lineinfo {lineindex} {
#*** !doctools
#[call class::textinfo [method lineinfo] [arg lineindex]]
#[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex
#[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending.
#[para]The 'payload' value is the same as is returned from the [method linepayload] method.
lassign [my numeric_linerange $lineindex 0] lineindex ;#convert lineindex to canonical number e.g 1_000 -> 1000 end -> highest index
return [dict create lineindex $lineindex {*}[dict get $o_linemap $lineindex] payload [lindex $o_payloadlist $lineindex]]
}
method lineinfolist {startidx endidx} {
#*** !doctools
#[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]]
#[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx
lassign [my numeric_linerange $startidx $endidx] startidx endidx
set chunkstart [dict get $o_linemap $startidx start]
set chunkend [dict get $o_linemap $endidx end]
set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assert - no need to view truncations as we've picked start and end of complete lines
#verify sanity
set l_start [lindex $line_list 0]
if {[set idx_start [dict get $l_start lineindex]] ne $startidx} {
error "lineinfolist first lineindex $idx_start doesn't match startidx $startidx"
}
set l_end [lindex $line_list end]
if {[set idx_end [dict get $l_end lineindex]] ne $endidx} {
error "lineinfolist last lineindex $idx_end doesn't match endidx $endidx"
}
return $line_list
}
method linerange_to_chunkrange {startidx endidx} {
#*** !doctools
#[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]]
lassign [my numeric_linerange $startidx $endidx] startidx endidx
#inclusive range
return [list [dict get $o_linemap $startidx start] [dict get $o_linemap $endidx end]]
}
method linerange_to_chunk {startidx endidx} {
#*** !doctools
#[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]]
set chunkrange [my linerange_to_chunkrange $startidx $endidx]
return [string range $o_chunk [lindex $chunkrange 0] [lindex $chunkrange 1]]
}
method lines {startidx endidx} {
#*** !doctools
#[call class::textinfo [method lines] [arg startidx] [arg endidx]]
lassign [my numeric_linerange $startidx $endidx] startidx endidx
set linelist [list]
set le_map [dict create lf \n crlf \r\n none ""]
for {set i $startidx} {$i <= $endidx} {incr i} {
lappend linelist "[lindex $o_payloadlist $i][dict get $le_map [dict get $o_linemap $i le]]"
}
return $linelist
}
method linepayloads {startidx endidx} {
#*** !doctools
#[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]]
return [lrange $o_payloadlist $startidx $endidx]
}
method chunkrange_to_linerange {chunkstart chunkend} {
#*** !doctools
#[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]]
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend
set linestart -1
for {set i 0} {$i < [llength $o_payloadlist]} {incr i} {
if {($chunkstart >= [dict get $o_linemap $i start]) && ($chunkstart <= [dict get $o_linemap $i end])} {
set linestart $i
break
}
}
if {$linestart == -1} {
error "Line with range in chunk spanning start index $chunkstart not found"
}
set lineend -1
for {set i [expr {[llength $o_payloadlist] -1}]} {$i >=0} {incr i -1} {
if {($chunkend >= [dict get $o_linemap $i start]) && ($chunkend <= [dict get $o_linemap $i end])} {
set lineend $i
break
}
}
if {$lineend == -1} {
error "Line with range spanning end index $chunkend not found"
}
return [list $linestart $lineend]
}
method chunkrange_to_lineinfolist {chunkstart chunkend args} {
#*** !doctools
#[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]]
#[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied
#[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list.
#[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending)
#[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk.
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend
set defaults [dict create\
-show_truncated 0\
]
set known_opts [dict keys $defaults]
foreach {k v} $args {
if {$k ni $known_opts} {
error "chunkrange_to_lines error: unknown option '$k'. Known options: $known_opts"
}
}
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- ---
set opt_show_truncated [dict get $opts -show_truncated]
# -- --- --- --- --- --- --- ---
set infolist [list]
set linerange [my chunkrange_to_linerange $chunkstart $chunkend]
lassign $linerange start_lineindex end_lineindex
#if -show_truncated
#return extra keys for first and last items (which may be the same item if chunkrange is entirely within a line)
#add is_truncated 0|1 to all lines
#Even if the start/end line is not fully within the chunkrange ie truncated - the 'payload' key will contain the original untruncated data
###########################
# first line may have payload tail truncated - or just linefeed, or even a split linefeed
###########################
set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]]
set start_info [dict get $o_linemap $start_lineindex]
if {$chunkstart > [dict get $start_info start]} {
dict set first is_truncated 1
dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line
} else {
dict set first is_truncated 0
}
if {$opt_show_truncated} {
#line1
if {$chunkstart > [dict get $start_info start]} {
#there is lhs truncation
set payload [lindex $o_payloadlist $start_lineindex]
set line_start [dict get $start_info start]
set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $start_info le]]
set payload_and_le "${payload}${le_chars}"
set split [expr {$chunkstart - $line_start}]
set truncated [string range $payload_and_le $split end]
set lhs [string range $payload_and_le 0 $split-1]
dict set first truncated $truncated
dict set first truncatedleft $lhs
}
}
###########################
###########################
# middle lines if any - no truncation
###########################
#difference in indexes of 1 would only mean 2 items to return
set middle_list [list]
if {($end_lineindex - $start_lineindex) > 1} {
for {set i [expr {$start_lineindex +1}]} {$i <= [expr {$end_lineindex -1}] } {incr i} {
#lineindex is key into main list
lappend middle_list [dict create lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i] is_truncated 0]
}
}
###########################
###########################
# tail line may have beginning or all of payload truncated - linefeed may be split if crlf
# may be same line as first line - in which case truncation at beginning as well
if {$end_lineindex == $start_lineindex} {
#same record
set end_info $start_info
if {$chunkend < [dict get $end_info end]} {
#there is rhs truncation
if {[dict get $first is_truncated]} {
dict set first truncatedside [list left right]
} else {
dict set first is_truncated 1
dict set first truncatedside [list right]
}
}
if {$opt_show_truncated} {
if {$chunkend < [dict get $end_info end]} {
#there is rhs truncation and we need to return the splits
#do rhs truncation - possibly in addition to existing lhs truncation
# ...
set payload [lindex $o_payloadlist $end_lineindex]
set line_start [dict get $end_info start]
set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]]
set payload_and_le "${payload}${le_chars}"
set split [expr {$chunkend - $line_start}]
set truncated [string range $payload_and_le 0 $split]
set rhs [string range $payload_and_le $split+1 end]
dict set first truncatedright $rhs
if {"left" ni [dict get $first truncatedside]} {
#rhs truncation only
puts "payload_and_le: $payload_and_le"
puts "LENGTH: [string length $payload_and_le]"
#---
#---
dict set first truncated $truncated
dict set first truncatedside [list right]
} else {
#truncated on both sides
set lhslen [string length [dict get $first truncatedleft]]
#re-truncate the truncation to reapply the original lhs truncation
set truncated [string range $truncated $lhslen end]
dict set first truncated $truncated
}
}
}
#no middle or last to append
lappend infolist $first
} else {
set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]]
set end_info [dict get $o_linemap $end_lineindex]
if {$chunkend < [dict get $end_info end]} {
dict set last is_truncated 1
dict set last truncatedside [list right]
} else {
dict set last is_truncated 0
}
if {$opt_show_truncated} {
if {$chunkend < [dict get $end_info end]} {
#there is rhs truncation - and last line in range is a different line to first one
set payload [lindex $o_payloadlist $end_lineindex]
set line_start [dict get $end_info start]
set line_end [dict get $end_info end]
set le [dict get $end_info le]
set le_size [dict get {lf 1 crlf 2 none 0} $le]
set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le]
set payload_and_le "${payload}${le_chars}"
set split [expr {$chunkend - $line_start}]
set truncated [string range $payload_and_le 0 $split]
set rhs [string range $payload_and_le $split+1 end]
dict set last truncated $truncated
dict set last truncatedright $rhs
#this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr
#this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload'
}
}
lappend infolist $first
if {[llength $middle_list]} {
lappend infolist {*}$middle_list
}
lappend infolist $last
}
###########################
#assert all records have is_truncated key.
#assert if is_truncated == 1 truncatedside should contain a list of either left, right or both left and right
#assert If not opt_show_truncated - then truncated records will not have truncated,truncatedleft,truncatedright keys.
return $infolist
}
#need to check truncations so that any split \r\n is counted precisely todo
method chunk_le_counts {chunkstart chunkend} {
set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend -show_truncated 1]
set lf_count 0
set crlf_count 0
set none_count 0
foreach d $infolines {
set le [dict get $d le]
if {$le eq "lf"} {
incr lf_count
} elseif {$le eq "crlf"} {
incr crlf_count
} else {
incr none_count
}
}
#even without split crlf - this can overcount by counting the lf or crlf in a line which had an ending not in the chunk range specified
#check first and last infoline for truncations
#Also check if the truncation is directly between an crlf
#both an lhs split and an rhs split could land between cr and lf
#to be precise - we should presumably count the part within our chunk as either a none for cr or an lf
#This means a caller counting chunk by chunk using this method will sometimes get the wrong answer depending on where crlfs lie relative to their chosen chunk size
#This is presumably ok - as it should be a well known thing to watch out for.
#If we're only receiving chunk by chunk we can't reliably detect splits vs lone <cr>s in the data
#There are surely more efficient ways for a caller to count line-endings in the way that makes sense for them
#but we should makes things as easy as possible for users of this line/chunk structure anyway.
set first [lindex $infolines 0]
if {[dict get $first is_truncated]} {
#could be the only line - and truncated at one or both ends.
#both a left and a right truncation could split a crlf
}
set last [lindex $infolines end]
if {[dict get $first lineindex] != [dict get $last lineindex]} {
#only need to process last if it is a different line
#if so - then split can only be left side
}
return [dict create lf $lf_count crlf $crlf_count unterminated $none_count warning line_ending_splits_unimplemented]
}
#todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk
method append_chunk {rawchunk} {
error "sorry - unimplemented"
}
method numeric_linerange {startidx endidx} {
#*** !doctools
#[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]]
#[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data
#[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices
return [my normalize_indices $startidx $endidx [expr {[dict size $o_linemap]-1}]]
}
method numeric_chunkrange {startidx endidx} {
#*** !doctools
#[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]]
#[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data
return [my normalize_indices $startidx $endidx [expr {[string length $o_chunk]-1}]]
}
method normalize_indices {startidx endidx max} {
#*** !doctools
#[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]]
#[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max
#[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted
#[para]startidx higher than endidx is allowed
#[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max
set original_startidx $startidx
set original_endidx $endidx
set startidx [string map [list _ ""] $startidx] ;#don't barf on Tcl 8.7+ underscores in numbers - we can't just use expr because it will not handle end-x
set endidx [string map [list _ ""] $endidx]
if {![string is digit -strict "$startidx$endidx"]} {
foreach whichvar [list start end] {
upvar 0 ${whichvar}idx index
if {![string is digit -strict $index]} {
if {"end" eq $index} {
set index $max
} elseif {[string match "*-*" $index]} {
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions
lassign [split $index -] A B
if {$A eq "end"} {
set index [expr {$max - $B}]
} else {
set index [expr {$A - $B}]
}
} elseif {[string match "*+*" $index]} {
lassign [split $index +] A B
if {$A eq "end"} {
#review - this will just result in out of bounds error in final test - as desired
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all.
set index [expr {$max + $B}]
} else {
set index [expr {$A + $B}]
}
} else {
#May be something like +2 or -0 which braced expr can hanle
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources.
if {[catch {expr {$index}} index]} {
#could be end+x - but we don't want out of bounds to be valid
#set it to something that the final bounds expr test can deal with
set index Inf
}
}
}
}
}
#Unlike Tcl lrange,lindex etc - we don't want to support out of bound indices.
#show the supplied index and what it was mapped to in the error message.
if {$startidx < 0 || $startidx > $max} {
error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max"
}
if {$endidx < 0 || $endidx > $max} {
error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max (try $max or end)"
}
return [list $startidx $endidx]
}
method regenerate_lines {args} {
#*** !doctools
#[call class::textinfo [method regenerate_lines]]
#[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex
#[para]This is called automatically by the Constructor during object creation
#[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered.
#[para]TODO - review whether such manual control will be necessary/desirable
#we don't store the actual line-endings as characters (for better layout of debug/display of data) - instead we store names lf|crlf|none
# first split on lf - then crlf. As we've replaced with single substution chars - the order doesn't matter.
set o_payloadlist [list]
set o_linemap [dict create]
set crlf_replace [list \r\n $o_CRLF_C \n $o_LF_C]
set normalised_data [string map $crlf_replace $o_chunk]
set lf_lines [split $normalised_data $o_LF_C]
set idx 0
set lf_count 0
set crlf_count 0
set filedata_offset 0
set i 0
set imax [expr {[llength $lf_lines]-1}]
foreach lfln $lf_lines {
set crlf_parts [split $lfln $o_CRLF_C]
if {[llength $crlf_parts] <= 1} {
#no crlf
set payloadlen [string length $lfln]
set le_size 1
set le lf
if {$i == $imax} {
#no more lf segments - and no crlfs
if {$payloadlen > 0} {
#last line in split has chars - therefore there was no trailing line-ending
set le_size 0
set le none
} else {
#empty space after last line-ending
#not really a line - we get here from splitting on our lf-replacement char
#An editor might display this pseudo-line with a line number - but we won't treat it as one here
break
}
}
lappend o_payloadlist $lfln
set linelen [expr {$payloadlen + $le_size}]
#we include line-ending in byte count for a line.
dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]]
incr filedata_offset $linelen
incr lf_count
incr idx
} else {
foreach crlfpart [lrange $crlf_parts 0 end-1] {
lappend o_payloadlist $crlfpart
set payloadlen [string length $crlfpart]
set linelen [expr {$payloadlen + 2}]
dict set o_linemap $idx [list le crlf linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]]
incr filedata_offset $linelen
incr crlf_count
incr idx
}
set lfpart [lindex $crlf_parts end]
set payloadlen [string length $lfpart]
if {$i == $imax} {
#no more lf segments - but we did find crlf in last (or perhaps only) lf line
#last element in our split has no le
if {$payloadlen > 0} {
set le_size 0
set le none
} else {
#set le_size 2
#set le crlf
break
}
} else {
#more lf segments to come
set le_size 1
set le lf
}
lappend o_payloadlist $lfpart
set linelen [expr {$payloadlen + $le_size}]
dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]]
incr filedata_offset $linelen
incr lf_count
incr idx
}
incr i
#incr filedata_offset ;#move up 1 so start entry for next line is greater than end entry for previous line
}
set le_count [expr {$lf_count + $crlf_count}]
if {$le_count != [llength $o_payloadlist]} {
puts stderr "fileline::class::textinfo warning. regenerate_lines lf_count: $lf_count + crlf_count: $crlf_count does not equal length of lines stored: [llength $o_payloadlist]"
}
}
method regenerate_chunk {} {
#o_payloadlist
#o_linemap
set oldsize [string length $o_chunk]
set newchunk ""
dict for {idx lineinfo} $o_linemap {
set
}
return [list newsize [string length $newchunk] oldsize $oldsize]
}
#*** !doctools
#[list_end]
}
#*** !doctools
#[list_end] [comment {--- end class enumeration ---}]
}
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Base namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::fileline {
namespace export *
#variable xyz
#*** !doctools
#[subsection {Namespace punk::fileline}]
#[para] Core API functions for punk::fileline
#[list_begin definitions]
proc get_textinfo {args} {
set defaults {
-file -default {} -type existingfile
-translation -default binary
}
lassign [dict values [punk::args::opts_values $defaults $args -minvalues 0 -maxvalues 1]] opts values
# -- --- --- ---
set opt_file [dict get $opts -file]
set opt_translation [dict get $opts -translation]
# -- --- --- ---
if {$opt_file ne ""} {
set filename $opt_file
set fd [open $filename r]
fconfigure $fd -translation $opt_translation
set datachunk [read $fd]
close $fd
if {[llength $values]} {
puts stderr "Ignoring trailing argument [string length [lindex $values 0]] bytes. Not used when -file is specified"
}
} else {
set datachunk [lindex $values 0]
}
set textobj [class::textinfo new $datachunk]
set summary ""
append summary "Bytes loaded : [$textobj chunklen]" \n
append summary "Lines recognised : [$textobj linecount]" \n
set leinfo [$textobj chunk_le_counts 0 end]
append summary "crlf endings (windows) : [dict get $leinfo crlf]" \n
append summary "lf endings (unix) : [dict get $leinfo lf]" \n
append summary "unterminated lines : [dict get $leinfo unterminated]" \n
puts stdout $summary
return $textobj
}
proc file_boundary_display {filename startbyte endbyte chunksize args} {
set fd [open $filename r] ;#use default error if file not readable
fconfigure $fd -translation binary
set rawfiledata [read $fd]
close $fd
set textobj [class::textinfo new $rawfiledata]
set result [$textobj chunk_boundary_display $startbyte $endbyte $chunksize {*}$args]
$textobj destroy
return $result
}
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::fileline ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Secondary API namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::fileline::lib {
namespace export *
namespace path [namespace parent]
#*** !doctools
#[subsection {Namespace punk::fileline::lib}]
#[para] Secondary functions that are part of the API
#[list_begin definitions]
proc range_spans_chunk_boundaries {start end chunksize args} {
#*** !doctools
#[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]]
#[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range.
#[list_begin arguments]
# [arg_def integer start]
# [para] zero-based start index of range
# [arg_def integer end]
# [para] zero-based end index of range
# [arg_def integer chunksize]
# [para] Number of bytes/characters in chunk - must be positive and > 0
#[list_end]
#[para]returns a dict with the keys is_span and boundaries
#[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize
#[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize
#[para]e.g
#[example_begin]
# range_spans_chunk_boundaries 10 1750 512
# is_span 1 boundaries {512 1024 1536}
#[example_end]
#[para]The -offset <int> option
#[example_begin]
# range_spans_chunk_boundaries 10 1750 512 -offset 2
# is_span 1 boundaries {514 1026 1538}
#[example_end]
#[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75
if {[catch {package require Tcl 8.7}]} {
#only one implementation available for older Tcl
tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args
}
if {$chunksize < 1} {
error "range_spans_chunk_boundaries chunksize must be >= 1"
}
if {(abs($end - $start) / $chunksize) < 75} {
tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args
} else {
tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize {*}$args
}
}
proc range_boundaries {start end chunksizes args} {
lassign [punk::get_leading_opts_and_values {\
-offset 0\
} $args] _opts opts _vals remainingargs
}
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Internal]
namespace eval punk::fileline::system {
#*** !doctools
#[subsection {Namespace punk::fileline::system}]
#[para] Internal functions that are not part of the API
#for 8.7+ using lseq
#much faster when resultant boundary size is large (at least when offset 0)
proc _range_spans_chunk_boundaries_lseq {start end chunksize args} {
if {$chunksize < 1} {error "chunksize must be > 0"} ;#sanitycheck in case called directly
set defaults [dict create\
-offset 0\
]
set known_opts [dict keys $defaults]
foreach {k v} $args {
if {$k ni $known_opts} {
error "unknown option '$k'. Known options: $known_opts"
}
}
set opts [dict merge $defaults $args]
# -- --- --- ---
set opt_offset [dict get $opts -offset]
# -- --- --- ---
set smod [expr {$start % $chunksize}]
if {$smod != 0} {
set start [expr {$start + ($chunksize - $smod)}]
if {$start > $end} {
return [list is_span 0 boundaries {}]
}
}
set boundaries [lseq $start to $end $chunksize]
#offset can be negative
if {$opt_offset} {
if {$opt_offset + [lindex $boundaries end] > $end || $opt_offset + [lindex $boundaries 0] < $start} {
set overflow 1
} else {
set overflow 0
}
set boundaries [lmap v $boundaries[unset boundaries] {expr {$v + $opt_offset}}]
if {$overflow} {
#we don't know how many overflowed..
set inrange [list]
foreach b $boundaries {
if {$b >= $start && $b <= $end} {
lappend inrange $b
}
}
set boundaries $inrange
}
}
return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries]
}
#faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case)
#gets very slow (comparitively) with large resultsets
proc _range_spans_chunk_boundaries_tcl {start end chunksize args} {
if {$chunksize < 1} {error "chunksize must be > 0"} ;#sanitycheck in case called directly
set defaults [dict create\
-offset 0\
]
set known_opts [dict keys $defaults]
foreach {k v} $args {
if {$k ni $known_opts} {
error "unknown option '$k'. Known options: $known_opts"
}
}
set opts [dict merge $defaults $args]
# -- --- --- ---
set opt_offset [dict get $opts -offset]
# -- --- --- ---
set is_span 0
set smod [expr {$start % $chunksize}]
if {$smod != 0} {
set start [expr {$start + ($chunksize - $smod)}]
}
set boundaries [list]
#we only need to pre-check the result-range for negative offsets - as our main loop stops before end?
if {$opt_offset < 0} {
#set btrack [expr {$start + $opt_offset}] ;#start back one to make sure we catch the first boundary
set btrack $bstart
set boff [expr {$btrack + $opt_offset}] ;#must be growing even if start and offset are negative - as chunksize is at least 1
while {$boff < $start} {
incr btrack $chunksize
set boff [expr {$btrack + $opt_offset}]
}
set bstart $btrack
} else {
set bstart $start
}
for {set b $bstart} {[set boff [expr {$b + $opt_offset}]] <= $end} {incr b $chunksize} {
lappend boundaries $boff
}
return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries offset $opt_offset]
}
proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} {
puts "main : [time {punk::fileline::lib::range_spans_chunk_boundaries $start $end $chunksize} $repeat]"
puts "tcl : [time {punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize} $repeat]"
if {![catch {package require Tcl 8.7}]} {
puts "lseq : [time {punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize} $repeat]"
}
}
}
namespace eval punk::fileline::ansi {
#*** !doctools
#[subsection {Namespace punk::fileline::ansi}]
#[para]These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable
#[para]See [package punk::ansi] for documentation
#[list_begin definitions]
variable enabled 1
#*** !doctools
#[call [fun ansi::a]]
#[call [fun ansi::a+]]
#[call [fun ansi::stripansi]]
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::fileline::ansi ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
package provide punk::fileline [namespace eval punk::fileline {
variable pkg punk::fileline
variable version
set version 999999.0a1.0
}]
return
#*** !doctools
#[manpage_end]