# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2024 # # @@ Meta Begin # Application punk::fileline 999999.0a1.0 # Meta platform tcl # Meta license BSD # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::fileline 0 999999.0a1.0] #[copyright "2024"] #[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] #[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] #[require punk::fileline] #[keywords module text parse file] #[description] #[para] - # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) #[para]This is important for certain text files where examining the number of chars/bytes is important #[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. #[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. #[subsection Concepts] #[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. #[example_begin] # package require punk::fileline # package require fileutil # set rawdata [lb]fileutil::cat data.txt -translation binary[rb] # punk::fileline::class::textinfo create obj_data $rawdata # puts stdout [lb]obj_data linecount[rb] #[example_end] #[subsection Notes] #[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. #[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. #[para]No support for lone carriage-returns being interpreted as line-endings. #[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages needed by punk::fileline #[list_begin itemized] package require Tcl 8.6 package require punk::args #*** !doctools #[item] [package {Tcl 8.6}] #[item] [package {punk::args}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] [comment {- end dependencies list -}] #*** !doctools #[subsection {optional dependencies}] #[para] packages that add functionality but aren't strictly required #[list_begin itemized] #*** !doctools #[item] [package {punk::ansi}] #[para] - recommended for class::textinfo [method chunk_boundary_display] #[item] [package {punk::char}] #[para] - recommended for class::textinfo [method chunk_boundary_display] #[item] [package {overtype}] #[para] - recommended for class::textinfo [method chunk_boundary_display] #*** !doctools #[list_end] [comment {- end optional dependencies list -}] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # oo::class namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::fileline::class { namespace export * #*** !doctools #[subsection {Namespace punk::fileline::class}] #[para] class definitions if {[info commands [namespace current]::textinfo] eq ""} { #*** !doctools #[list_begin enumerated] # oo::class create interface_sample1 { # #*** !doctools # #[enum] CLASS [class interface_sample1] # #[list_begin definitions] # method test {arg1} { # #*** !doctools # #[call class::interface_sample1 [method test] [arg arg1]] # #[para] test method # puts "test: $arg1" # } # #*** !doctools # #[list_end] [comment {-- end definitions interface_sample1}] # } #uses zero based indexing. Caller can add 1 for line numbers oo::class create [namespace current]::textinfo { #*** !doctools #[enum] CLASS [class textinfo] #[list_begin definitions] # [para] [emph METHODS] variable o_chunk ;#current state variable o_chunkop_store variable o_lineop_store variable o_chunk_epoch variable o_line_epoch variable o_payloadlist variable o_linemap variable o_LF_C variable o_CRLF_C constructor {datachunk args} { #*** !doctools #[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] #[para] Constructor for textinfo object which represents a chunk or all of a file #[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: #[example_begin] # fconfigure $fd -translation binary # set chunkdata [lb]read $fd[rb]] #or # set chunkdata [lb]fileutil::cat -translation binary[rb] #[example_end] #[para] when loading the data namespace eval [namespace current] { set nspath [namespace path] foreach p [list ::punk::fileline ::punk::fileline::ansi] { if {$p ni $nspath} { lappend nspath $p } } namespace path $nspath } set o_chunk $datachunk set o_line_epoch [list] set o_chunk_epoch [list "fromchunkchange-at-[clock micros]"] set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message set defaults [dict create\ -substitutionmap {}\ -crlf_lf_placeholders $crlf_lf_placeholders\ -userid ""\ ] set known_opts [dict keys $defaults] foreach {k v} $args { if {$k ni $known_opts} { error "[self] constructor error: unknown option '$k'. Known options: $known_opts" } } set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- set opt_substitutionmap [dict get $opts -substitutionmap] ;#review - can be done by caller - or a loadable -policy set opt_crlf_lf_placeholders [dict get $opts -crlf_lf_placeholders] set opt_userid [dict get $opts -userid] # -- --- --- --- --- --- --- if {[llength $opt_crlf_lf_placeholders] != 2 || [string length [lindex $opt_crlf_lf_placeholders 0]] !=1 || [string length [lindex $opt_crlf_lf_placeholders 1]] !=1} { error "textinfo::constructor error: -crlf_lf_placeholders requires a list of exactly 2 chars" } lassign $opt_crlf_lf_placeholders o_LF_C o_CRLF_C if {[string first $o_LF_C $o_chunk] >=0} { set decval [scan $o_LF_C %c] if {$decval < 32 || $decval > 127} { set char_desc "(decimal value $decval)" } else { set char_desc "'$o_LF_C' (decimal value $decval)" } error "textinfo::constructor error: rawfiledata already contains linefeed substitution character $char_desc specified as first element of -crlf_lf_placeholders" } if {[string first $o_CRLF_C $o_chunk] >=0} { set decval [scan $o_CRLF_C %c] if {$decval < 32 || $decval > 127} { set char_desc "(decimal value $decval)" } else { set char_desc "'$o_CRLF_C' (decimal value $decval)" } error "textinfo::constructor error: rawfiledata already contains carriagereturn-linefeed substitution character $char_desc specified as second element of -crlf_lf_placeholders" } if {$o_LF_C eq $o_CRLF_C} { puts stderr "WARNING: same substitution character used for both elements of -crlf_lf_placeholders - byte counting may be off if file contains mixed line-endings" } my regenerate_lines } method chunk {chunkstart chunkend} { #*** !doctools #[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] #[para]Return a range of bytes from the underlying raw chunk data. #[para] e.g The following retrieves the entire chunk #[para] objName chunk 0 end return [string range $o_chunk $chunkstart $chunkend] } method chunklen {} { #*** !doctools #[call class::textinfo [method chunklen]] #[para] Number of bytes/characters in the raw data of the file return [string length $o_chunk] } method chunk_boundary_display {chunkstart chunkend chunksize args} { #*** !doctools #[call class::textinfo [method chunk_boundary_display]] #[para]Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend #[para]Defaults to using ansi colour if punk::ansi module is available. Use -ansi 0 to disable colour set defaults [dict create\ -ansi $::punk::fileline::ansi::enabled\ -offset 0\ -displaybytes 200\ -truncatedmark "..."\ -completemark "---"\ -moremark " + "\ -continuemark " > "\ -linemaxwidth 100\ -linebase 0\ -limit -1\ -boundaries {}\ -showconfig 0\ -boundaryheader {Boundary %i% at %b%}\ ] set known_opts [dict keys $defaults] foreach {k v} $args { if {$k ni $known_opts} { error "[self]::chunk_boundary error: unknown option '$k'. Known options: $known_opts" } } set opts [dict merge $defaults $args] # -- --- --- --- --- --- set opt_ansi [dict get $opts -ansi] set opt_offset [dict get $opts -offset] set opt_displaybytes [dict get $opts -displaybytes] set opt_tmark [dict get $opts -truncatedmark] set opt_cmark [dict get $opts -completemark] set opt_linemax [dict get $opts -linemaxwidth] set opt_linebase [dict get $opts -linebase] set opt_linebase [string map [list _ ""] $opt_linebase] set opt_limit [dict get $opts -limit] ;#limit number of boundaries to display set opt_boundaries [dict get $opts -boundaries] ;#use pre-calculated boundaries if supplied set opt_showconfig [dict get $opts -showconfig] set opt_boundaryheader [dict get $opts -boundaryheader] # -- --- --- --- --- --- package require overtype # will require punk::char and punk::ansi if {"::punk::fileline::ansi::stripansi" ne [info commands ::punk::fileline::ansi::stripansi]} { namespace eval ::punk::fileline::ansi { namespace import ::punk::ansi::* } } #This mechanism for enabling/disabling ansi is a bit clumsy - prone to errors with regard to keeping in sync with any api changes in punk ansi #It's done here to allow this to be used without the full set of punk modules and/or shell - REVIEW #risk of failing to reset on error set pre_ansi_enabled $::punk::fileline::ansi::enabled if {$opt_ansi} { set ::punk::fileline::ansi::enabled 1 } else { set ::punk::fileline::ansi::enabled 0 } if {"::punk::fileline::stripansi" ne [info commands ::punk::fileline::stripansi]} { proc ::punk::fileline::a {args} { if {$::punk::fileline::ansi::enabled} { tailcall ::punk::fileline::ansi::a {*}$args } else { return "" } } proc ::punk::fileline::a+ {args} { if {$::punk::fileline::ansi::enabled} { tailcall ::punk::fileline::ansi::a+ {*}$args } else { return "" } } proc ::punk::fileline::stripansi {str} { if {$::punk::fileline::ansi::enabled} { tailcall ::punk::fileline::ansi::stripansi $str } else { return $str } } } set maxline [lindex [my chunkrange_to_linerange $chunkend $chunkend] 0] set minline [lindex [my chunkrange_to_linerange $chunkstart $chunkstart] 0] #suport simple end+-int (+-)start(+-)int to set linebase to line corresponding to chunkstart or chunkend #also simple int+int and int-int - nothing more complicated (similar to Tcl lrange etc in that regard) #commonly this will be something like -start or -end if {![string is integer -strict $opt_linebase]} { set sign "" set errunrecognised "unrecognised -linebase value '$opt_linebase'. Expected positive or negative integer or -start -start-int -start+int -end -end-int -end+int or -eof (where leading - is optional but probably desirable) " if {[string index $opt_linebase 0] eq "-"} { set sign - set tail [string range $opt_linebase 1 end] } else { set tail [string trimleft $opt_linebase +];#ignore + } if {[string match eof* $tail]} { set endmath [string range $tail 3 end] #todo endmath? if {$tail eq "eof"} { set lastline [lindex [my chunkrange_to_linerange end end] 0] set linebase ${sign}$lastline } else { error $errunrecognised } } elseif {[string match end* $tail]} { set endmath [string range $tail 3 end] if {[string length $endmath]} { set op [string index $endmath 0] if {$op in {+ -}} { set operand [string range $endmath 1 end] if {[string is integer -strict $operand]} { if {$op eq "+"} { set linebase [expr {$maxline + $operand}] } else { set linebase [expr {$maxline - $operand}] } } else { error $errunrecognised } } else { error $errunrecognised } } else { set linebase $maxline } set linebase ${sign}$linebase } elseif {[string match start* $tail]} { set endmath [string range $tail 5 end] if {[string length $endmath]} { set op [string index $endmath 0] if {$op in {+ -}} { set operand [string range $endmath 1 end] if {[string is integer -strict $operand]} { if {$op eq "+"} { set linebase [expr {$minline + $operand}] } else { set linebase [expr {$minline - $operand}] } } else { error $errunrecognised } } else { error $errunrecognised } } else { set linebase $minline } set linebase ${sign}$linebase } elseif {[string match *-* $tail]} { set extras [lassign [split $tail -] int1 int2] if {[llength $extras]} { error $errunrecognised } if {![string is integer -strict $int1] || ![string is integer -strict $int2]} { error $errunrecognised } set linebase [expr {$int1 - $int2}] set linebase ${sign}$linebase } elseif {[string match *+* $tail]} { set extras [lassign [split $tail +] int1 int2] if {[llength $extras]} { error $errunrecognised } if {![string is integer -strict $int1] || ![string is integer -strict $int2]} { error $errunrecognised } set linebase [expr {$int1 + $int2}] set linebase ${sign}$linebase } else { error $errunrecognised } } else { set linebase $opt_linebase } lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend if {![llength $opt_boundaries]} { set binfo [lib::range_spans_chunk_boundaries $chunkstart $chunkend $chunksize -offset $opt_offset] set boundaries [dict get $binfo boundaries] } else { set boundaries [list] foreach b $opt_boundaries { if {$chunkstart <= $b && $chunkend >= $b} { lappend boundaries [expr {$b + $opt_offset}] } } } if {![llength $boundaries]} { return "No boundaries found between $chunkstart and $chunkend for chunksize $chunksize (when offset $opt_offset)" } if {$opt_showconfig} { set result "chunk range $chunkstart $chunkend line range $minline $maxline linebase $linebase limit $opt_limit\n" } else { set result "" } set pre_bytes [expr {$opt_displaybytes /2}] set post_bytes $pre_bytes set max_bytes [expr {[my chunklen] -1}] if {$opt_limit > 0} { set boundaries [lrange $boundaries[unset boundaries] 0 $opt_limit-1] } set i 0 foreach b $boundaries { if {$opt_boundaryheader ne ""} { set j [expr {$i+1}] append result [string map [list %b% $b %i% $i %j% $j] $opt_boundaryheader] \n } set low [expr {max(($b - $pre_bytes),0)}] set high [expr {min(($b + $post_bytes),$max_bytes)}] set lineinfolist [my chunkrange_to_lineinfolist $low $high -show_truncated 1] set le_map [list \r\n \r \n ] set result_list [list] foreach lineinfo $lineinfolist { set lineidx [dict get $lineinfo lineindex] set linenum [expr {$lineidx + $linebase}] set s [dict get $lineinfo start] set e [dict get $lineinfo end] set boundarymarker "" set displayidx "" set linenum_display $linenum if {$s <= $b && $e >= $b} { set idx [expr {$b - $s}] ;#index into whole position in whole line - not so useful if we're viewing a small section of a line set char [string index [my line $lineidx] $idx] set char_display [string map [list \r \n ] $char] if {[dict get $lineinfo is_truncated]} { set tside [dict get $lineinfo truncatedside] set truncated [dict get $lineinfo truncated] set tlen [string length $truncated] if {"left" in $tside} { set tleft [dict get $lineinfo truncatedleft] set tleftlen [string length $tleft] set displayidx [expr {$idx - $tleftlen}] } elseif {"right" in $tside} { set displayidx $idx } } else { set displayidx $idx } set boundarymarker "'[a+ green bold]$char_display[a]'@$displayidx" set linenum_display ${linenum_display},$idx } set lhs_status $opt_cmark ;#default set rhs_status $opt_cmark ;#default if {[dict get $lineinfo is_truncated]} { set line [dict get $lineinfo truncated] set tside [dict get $lineinfo truncatedside] if {"left" in $tside && "right" in $tside } { set lhs_status $opt_tmark set rhs_status $opt_tmark } elseif {"left" in $tside} { set lhs_status $opt_tmark } elseif {"right" in $tside} { set rhs_status $opt_tmark } } else { set line [my line $lineidx] } if {$displayidx ne ""} { set line [string replace $line $displayidx $displayidx [a+ White green bold]$char_display[a]] } set displayline [string map $le_map $line] lappend result_list [list $linenum_display $boundarymarker $lhs_status $displayline $rhs_status] } set title_linenum "LNUM" set linenums [lsearch -index 0 -all -inline -subindices $result_list *] set markers [lsearch -index 1 -all -inline -subindices $result_list *] set lines [lsearch -index 3 -all -inline -subindices $result_list *] set title_marker "" set title_line "Line" #todo - use punk::char for unicode support of wide chars etc? set widest_linenum [tcl::mathfunc::max {*}[lmap v [concat [list $title_linenum] $linenums] {string length $v}]] set widest_marker [tcl::mathfunc::max {*}[lmap v [concat [list $title_marker] $markers] {string length [stripansi $v]}]] set widest_status [expr {max([string length $opt_cmark], [string length $opt_tmark])}] set widest_line [tcl::mathfunc::max {*}[lmap v [concat [list $title_line] $lines] {string length $v}]] foreach row $result_list { lassign $row linenum marker lhs_status line rhs_status append result [format " %-*s " $widest_linenum $linenum] append result [format " %-*s " $widest_marker $marker] append result [format " %-*s " $widest_status $lhs_status] append result [format " %-*s " $widest_line $line] append result [format " %-*s " $widest_status $rhs_status] \n } incr i } set ::punk::fileline::ansi::enabled $pre_ansi_enabled return $result } method linecount {} { #*** !doctools #[call class::textinfo [method linecount]] #[para] Number of lines in the raw data of the file, counted as per the policy in effect return [llength $o_payloadlist] } method line {lineindex} { #*** !doctools #[call class::textinfo [method line] [arg lineindex]] #[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata #[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) #[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" #[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending lassign [my numeric_linerange $lineindex 0] lineindex set le [dict get $o_linemap $lineindex le] set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] return [lindex $o_payloadlist $lineindex]$le_chars } method chunk_find_glob {globsearch args} { #todo - use linepayload_find_glob when -ignore_lineendings is 0 - but check truncations for 1st and last line error "unimplemented" } method linepayload_find_glob {globsearch args} { #*** !doctools #[call class::textinfo [method linepayload_find_glob] [arg globsearch] [opt {option value...}]] #[para]Return a lineinfolist (see [method lineinfo] and [method lineinfolist]) of lines where payload matches the [arg globsearch] string #[para]To limit the returned results use the -limit n option - where -limit 0 means return all matches. #[para]For example: [method linepayload_find_glob] "*test*" -limit 1 #[para]The result is always a list of lineinfo dictionaries even if one item is returned #[para] -limitfrom can be start|end #[para]The order of results is always the order as they occur in the data - even if -limitfrom end is specified. #[para]-limitfrom end means that only the last -limit items are returned #[para]Note that as glob accepts [lb]chars[rb]] to mean match any character in the set given by chars, searching for literal square brackets should be done by escaping the bracket with a backslash #[para]This is true even if only a single square bracket is being searched for. e.g {*[lb]file*} will not find the word file followed by a left square-bracket - even though the search didn't close the square brackets. #[para]In the above case - the literal search should be {*\[lb]file*} set defaults [dict create\ -limit 0\ -strategy 1\ -start 0\ -end end\ -limitfrom start\ ] set known_opts [dict keys $defaults] dict for {k v} $args { if {$k ni $known_opts} { error "linepayload_find_glob unknown option '$k'. Known options: $known_opts" } } set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_limit [dict get $opts -limit] if {![string is integer -strict $opt_limit] || $opt_limit < 0} { error "linepayload_find_glob -limit must be positive integer" } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_strategy [dict get $opts -strategy] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_start [dict get $opts -start] set opt_start [expr {$opt_start}] if {$opt_start != 0} {error "-start unimplemented"} # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_end [dict get $opts -end] set max_line_index [expr {[llength $o_payloadlist]-1}] if {$opt_end eq "end"} { set opt_end $max_line_index } #TODO if {$opt_end < $max_line_index} {error "-end less than max_line_index unimplemented"} # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_limitfrom [dict get $opts -limitfrom] #-limitfrom start|end only #TODO if {$opt_limitfrom ne "start"} {error "-limitfrom unimplemented"} # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- set lineinfolist [list] if {$opt_limit == 1} { set idx [lsearch -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch] if {$idx >=0} { set i [expr {$opt_start + $idx}] lappend lineinfolist [list lineindex $i [dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] } } elseif {$opt_limit == 0} { set indices [lsearch -all -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch] foreach irel $indices { set i [expr {$opt_start + $irel}] lappend lineinfolist [list lineindex $i [dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] } } else { #todo - auto-strategy based on limit vs number of lines if {$opt_strategy == 0} { set posn 0 for {set r 0} {$r < $opt_limit} {incr r} { set n [lsearch [lrange $o_payloadlist $posn+$opt_start end] $globsearch] if {$n >=0} { set irel [expr {$posn + $n}] set i [expr {$irel + $opt_start}] lappend lineinfolist [list lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] set posn [expr {$irel+1}] } } } else { set indices [lsearch -all -glob [lrange $o_payloadlist $opt_start $opt_end] $globsearch] set limited [lrange $indices 0 $opt_limit-1] foreach irel $limited { set i [expr {$opt_start + $irel}] lappend lineinfolist [list lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i]] } } } return $lineinfolist } method linepayload {lineindex} { #*** !doctools #[call class::textinfo [method linepayload] [arg lineindex]] #[para]Return the text of the line indicated by the zero-based lineindex #[para]The line-ending is not returned in the data - but is still stored against this lineindex #[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method #[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used #[para]To retrieve an entire line including line-ending use the [method line] method. lassign [my numeric_linerange $lineindex 0] lineindex return [lindex $o_payloadlist $lineindex] } method linepayloads {startindex endindex} { #*** !doctools #[call class::textinfo [method linepayloads] [arg startindex] [arg endindex]] #[para]Return a list of just the payloads in the specified linindex range, with no metadata. return [lrange $o_payloadlist $startindex $endindex] } method linemeta {lineindex} { #*** !doctools #[call class::textinfo [method linemeta] [arg lineindex]] #[para]Return a dict of the metadata for the line indicated by the zero-based lineindex #[para]Keys returned include #[list_begin itemized] #[item] le #[para] A string representing the type of line-ending: crlf|lf|none #[item] linelen #[para] The number of characters/bytes in the whole line including line-ending if any #[item] payloadlen #[para] The number of character/bytes in the line excluding line-ending #[item] start #[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins #[item] end #[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends #[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload #[list_end] lassign [my numeric_linerange $lineindex 0] lineindex dict get $o_linemap $lineindex } method lineinfo {lineindex} { #*** !doctools #[call class::textinfo [method lineinfo] [arg lineindex]] #[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex #[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. #[para]The 'payload' value is the same as is returned from the [method linepayload] method. lassign [my numeric_linerange $lineindex 0] lineindex ;#convert lineindex to canonical number e.g 1_000 -> 1000 end -> highest index return [dict create lineindex $lineindex {*}[dict get $o_linemap $lineindex] payload [lindex $o_payloadlist $lineindex]] } method lineinfolist {startidx endidx} { #*** !doctools #[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]] #[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx lassign [my numeric_linerange $startidx $endidx] startidx endidx set chunkstart [dict get $o_linemap $startidx start] set chunkend [dict get $o_linemap $endidx end] set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assert - no need to view truncations as we've picked start and end of complete lines #verify sanity set l_start [lindex $line_list 0] if {[set idx_start [dict get $l_start lineindex]] ne $startidx} { error "lineinfolist first lineindex $idx_start doesn't match startidx $startidx" } set l_end [lindex $line_list end] if {[set idx_end [dict get $l_end lineindex]] ne $endidx} { error "lineinfolist last lineindex $idx_end doesn't match endidx $endidx" } return $line_list } method linerange_to_chunkrange {startidx endidx} { #*** !doctools #[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] lassign [my numeric_linerange $startidx $endidx] startidx endidx #inclusive range return [list [dict get $o_linemap $startidx start] [dict get $o_linemap $endidx end]] } method linerange_to_chunk {startidx endidx} { #*** !doctools #[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] set chunkrange [my linerange_to_chunkrange $startidx $endidx] return [string range $o_chunk [lindex $chunkrange 0] [lindex $chunkrange 1]] } method lines {startidx endidx} { #*** !doctools #[call class::textinfo [method lines] [arg startidx] [arg endidx]] lassign [my numeric_linerange $startidx $endidx] startidx endidx set linelist [list] set le_map [dict create lf \n crlf \r\n none ""] for {set i $startidx} {$i <= $endidx} {incr i} { lappend linelist "[lindex $o_payloadlist $i][dict get $le_map [dict get $o_linemap $i le]]" } return $linelist } method linepayloads {startidx endidx} { #*** !doctools #[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] return [lrange $o_payloadlist $startidx $endidx] } method chunkrange_to_linerange {chunkstart chunkend} { #*** !doctools #[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend set linestart -1 for {set i 0} {$i < [llength $o_payloadlist]} {incr i} { if {($chunkstart >= [dict get $o_linemap $i start]) && ($chunkstart <= [dict get $o_linemap $i end])} { set linestart $i break } } if {$linestart == -1} { error "Line with range in chunk spanning start index $chunkstart not found" } set lineend -1 for {set i [expr {[llength $o_payloadlist] -1}]} {$i >=0} {incr i -1} { if {($chunkend >= [dict get $o_linemap $i start]) && ($chunkend <= [dict get $o_linemap $i end])} { set lineend $i break } } if {$lineend == -1} { error "Line with range spanning end index $chunkend not found" } return [list $linestart $lineend] } method chunkrange_to_lineinfolist {chunkstart chunkend args} { #*** !doctools #[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] #[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied #[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. #[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) #[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend set defaults [dict create\ -show_truncated 0\ ] set known_opts [dict keys $defaults] foreach {k v} $args { if {$k ni $known_opts} { error "chunkrange_to_lines error: unknown option '$k'. Known options: $known_opts" } } set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- set opt_show_truncated [dict get $opts -show_truncated] # -- --- --- --- --- --- --- --- set infolist [list] set linerange [my chunkrange_to_linerange $chunkstart $chunkend] lassign $linerange start_lineindex end_lineindex #if -show_truncated #return extra keys for first and last items (which may be the same item if chunkrange is entirely within a line) #add is_truncated 0|1 to all lines #Even if the start/end line is not fully within the chunkrange ie truncated - the 'payload' key will contain the original untruncated data ########################### # first line may have payload tail truncated - or just linefeed, or even a split linefeed ########################### set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]] set start_info [dict get $o_linemap $start_lineindex] if {$chunkstart > [dict get $start_info start]} { dict set first is_truncated 1 dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line } else { dict set first is_truncated 0 } if {$opt_show_truncated} { #line1 if {$chunkstart > [dict get $start_info start]} { #there is lhs truncation set payload [lindex $o_payloadlist $start_lineindex] set line_start [dict get $start_info start] set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $start_info le]] set payload_and_le "${payload}${le_chars}" set split [expr {$chunkstart - $line_start}] set truncated [string range $payload_and_le $split end] set lhs [string range $payload_and_le 0 $split-1] dict set first truncated $truncated dict set first truncatedleft $lhs } } ########################### ########################### # middle lines if any - no truncation ########################### #difference in indexes of 1 would only mean 2 items to return set middle_list [list] if {($end_lineindex - $start_lineindex) > 1} { for {set i [expr {$start_lineindex +1}]} {$i <= [expr {$end_lineindex -1}] } {incr i} { #lineindex is key into main list lappend middle_list [dict create lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i] is_truncated 0] } } ########################### ########################### # tail line may have beginning or all of payload truncated - linefeed may be split if crlf # may be same line as first line - in which case truncation at beginning as well if {$end_lineindex == $start_lineindex} { #same record set end_info $start_info if {$chunkend < [dict get $end_info end]} { #there is rhs truncation if {[dict get $first is_truncated]} { dict set first truncatedside [list left right] } else { dict set first is_truncated 1 dict set first truncatedside [list right] } } if {$opt_show_truncated} { if {$chunkend < [dict get $end_info end]} { #there is rhs truncation and we need to return the splits #do rhs truncation - possibly in addition to existing lhs truncation # ... set payload [lindex $o_payloadlist $end_lineindex] set line_start [dict get $end_info start] set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] set payload_and_le "${payload}${le_chars}" set split [expr {$chunkend - $line_start}] set truncated [string range $payload_and_le 0 $split] set rhs [string range $payload_and_le $split+1 end] dict set first truncatedright $rhs if {"left" ni [dict get $first truncatedside]} { #rhs truncation only puts "payload_and_le: $payload_and_le" puts "LENGTH: [string length $payload_and_le]" #--- #--- dict set first truncated $truncated dict set first truncatedside [list right] } else { #truncated on both sides set lhslen [string length [dict get $first truncatedleft]] #re-truncate the truncation to reapply the original lhs truncation set truncated [string range $truncated $lhslen end] dict set first truncated $truncated } } } #no middle or last to append lappend infolist $first } else { set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]] set end_info [dict get $o_linemap $end_lineindex] if {$chunkend < [dict get $end_info end]} { dict set last is_truncated 1 dict set last truncatedside [list right] } else { dict set last is_truncated 0 } if {$opt_show_truncated} { if {$chunkend < [dict get $end_info end]} { #there is rhs truncation - and last line in range is a different line to first one set payload [lindex $o_payloadlist $end_lineindex] set line_start [dict get $end_info start] set line_end [dict get $end_info end] set le [dict get $end_info le] set le_size [dict get {lf 1 crlf 2 none 0} $le] set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] set payload_and_le "${payload}${le_chars}" set split [expr {$chunkend - $line_start}] set truncated [string range $payload_and_le 0 $split] set rhs [string range $payload_and_le $split+1 end] dict set last truncated $truncated dict set last truncatedright $rhs #this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr #this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload' } } lappend infolist $first if {[llength $middle_list]} { lappend infolist {*}$middle_list } lappend infolist $last } ########################### #assert all records have is_truncated key. #assert if is_truncated == 1 truncatedside should contain a list of either left, right or both left and right #assert If not opt_show_truncated - then truncated records will not have truncated,truncatedleft,truncatedright keys. return $infolist } #need to check truncations so that any split \r\n is counted precisely todo method chunk_le_counts {chunkstart chunkend} { set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend -show_truncated 1] set lf_count 0 set crlf_count 0 set none_count 0 foreach d $infolines { set le [dict get $d le] if {$le eq "lf"} { incr lf_count } elseif {$le eq "crlf"} { incr crlf_count } else { incr none_count } } #even without split crlf - this can overcount by counting the lf or crlf in a line which had an ending not in the chunk range specified #check first and last infoline for truncations #Also check if the truncation is directly between an crlf #both an lhs split and an rhs split could land between cr and lf #to be precise - we should presumably count the part within our chunk as either a none for cr or an lf #This means a caller counting chunk by chunk using this method will sometimes get the wrong answer depending on where crlfs lie relative to their chosen chunk size #This is presumably ok - as it should be a well known thing to watch out for. #If we're only receiving chunk by chunk we can't reliably detect splits vs lone s in the data #There are surely more efficient ways for a caller to count line-endings in the way that makes sense for them #but we should makes things as easy as possible for users of this line/chunk structure anyway. set first [lindex $infolines 0] if {[dict get $first is_truncated]} { #could be the only line - and truncated at one or both ends. #both a left and a right truncation could split a crlf } set last [lindex $infolines end] if {[dict get $first lineindex] != [dict get $last lineindex]} { #only need to process last if it is a different line #if so - then split can only be left side } return [dict create lf $lf_count crlf $crlf_count unterminated $none_count warning line_ending_splits_unimplemented] } #todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk method append_chunk {rawchunk} { error "sorry - unimplemented" } method numeric_linerange {startidx endidx} { #*** !doctools #[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] #[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data #[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices return [my normalize_indices $startidx $endidx [expr {[dict size $o_linemap]-1}]] } method numeric_chunkrange {startidx endidx} { #*** !doctools #[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] #[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data return [my normalize_indices $startidx $endidx [expr {[string length $o_chunk]-1}]] } method normalize_indices {startidx endidx max} { #*** !doctools #[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] #[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max #[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted #[para]startidx higher than endidx is allowed #[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max set original_startidx $startidx set original_endidx $endidx set startidx [string map [list _ ""] $startidx] ;#don't barf on Tcl 8.7+ underscores in numbers - we can't just use expr because it will not handle end-x set endidx [string map [list _ ""] $endidx] if {![string is digit -strict "$startidx$endidx"]} { foreach whichvar [list start end] { upvar 0 ${whichvar}idx index if {![string is digit -strict $index]} { if {"end" eq $index} { set index $max } elseif {[string match "*-*" $index]} { #end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions lassign [split $index -] A B if {$A eq "end"} { set index [expr {$max - $B}] } else { set index [expr {$A - $B}] } } elseif {[string match "*+*" $index]} { lassign [split $index +] A B if {$A eq "end"} { #review - this will just result in out of bounds error in final test - as desired #By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all. set index [expr {$max + $B}] } else { set index [expr {$A + $B}] } } else { #May be something like +2 or -0 which braced expr can hanle #we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources. if {[catch {expr {$index}} index]} { #could be end+x - but we don't want out of bounds to be valid #set it to something that the final bounds expr test can deal with set index Inf } } } } } #Unlike Tcl lrange,lindex etc - we don't want to support out of bound indices. #show the supplied index and what it was mapped to in the error message. if {$startidx < 0 || $startidx > $max} { error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max" } if {$endidx < 0 || $endidx > $max} { error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max (try $max or end)" } return [list $startidx $endidx] } method regenerate_lines {args} { #*** !doctools #[call class::textinfo [method regenerate_lines]] #[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex #[para]This is called automatically by the Constructor during object creation #[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered. #[para]TODO - review whether such manual control will be necessary/desirable #we don't store the actual line-endings as characters (for better layout of debug/display of data) - instead we store names lf|crlf|none # first split on lf - then crlf. As we've replaced with single substution chars - the order doesn't matter. set o_payloadlist [list] set o_linemap [dict create] set crlf_replace [list \r\n $o_CRLF_C \n $o_LF_C] set normalised_data [string map $crlf_replace $o_chunk] set lf_lines [split $normalised_data $o_LF_C] set idx 0 set lf_count 0 set crlf_count 0 set filedata_offset 0 set i 0 set imax [expr {[llength $lf_lines]-1}] foreach lfln $lf_lines { set crlf_parts [split $lfln $o_CRLF_C] if {[llength $crlf_parts] <= 1} { #no crlf set payloadlen [string length $lfln] set le_size 1 set le lf if {$i == $imax} { #no more lf segments - and no crlfs if {$payloadlen > 0} { #last line in split has chars - therefore there was no trailing line-ending set le_size 0 set le none } else { #empty space after last line-ending #not really a line - we get here from splitting on our lf-replacement char #An editor might display this pseudo-line with a line number - but we won't treat it as one here break } } lappend o_payloadlist $lfln set linelen [expr {$payloadlen + $le_size}] #we include line-ending in byte count for a line. dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] incr filedata_offset $linelen incr lf_count incr idx } else { foreach crlfpart [lrange $crlf_parts 0 end-1] { lappend o_payloadlist $crlfpart set payloadlen [string length $crlfpart] set linelen [expr {$payloadlen + 2}] dict set o_linemap $idx [list le crlf linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] incr filedata_offset $linelen incr crlf_count incr idx } set lfpart [lindex $crlf_parts end] set payloadlen [string length $lfpart] if {$i == $imax} { #no more lf segments - but we did find crlf in last (or perhaps only) lf line #last element in our split has no le if {$payloadlen > 0} { set le_size 0 set le none } else { #set le_size 2 #set le crlf break } } else { #more lf segments to come set le_size 1 set le lf } lappend o_payloadlist $lfpart set linelen [expr {$payloadlen + $le_size}] dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] incr filedata_offset $linelen incr lf_count incr idx } incr i #incr filedata_offset ;#move up 1 so start entry for next line is greater than end entry for previous line } set le_count [expr {$lf_count + $crlf_count}] if {$le_count != [llength $o_payloadlist]} { puts stderr "fileline::class::textinfo warning. regenerate_lines lf_count: $lf_count + crlf_count: $crlf_count does not equal length of lines stored: [llength $o_payloadlist]" } } method regenerate_chunk {} { #o_payloadlist #o_linemap set oldsize [string length $o_chunk] set newchunk "" dict for {idx lineinfo} $o_linemap { set } return [list newsize [string length $newchunk] oldsize $oldsize] } #*** !doctools #[list_end] } #*** !doctools #[list_end] [comment {--- end class enumeration ---}] } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Base namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::fileline { namespace export * #variable xyz #*** !doctools #[subsection {Namespace punk::fileline}] #[para] Core API functions for punk::fileline #[list_begin definitions] proc get_textinfo {args} { set defaults { -file -default {} -type existingfile -translation -default binary } lassign [dict values [punk::args::opts_values $defaults $args -minvalues 0 -maxvalues 1]] opts values # -- --- --- --- set opt_file [dict get $opts -file] set opt_translation [dict get $opts -translation] # -- --- --- --- if {$opt_file ne ""} { set filename $opt_file set fd [open $filename r] fconfigure $fd -translation $opt_translation set datachunk [read $fd] close $fd if {[llength $values]} { puts stderr "Ignoring trailing argument [string length [lindex $values 0]] bytes. Not used when -file is specified" } } else { set datachunk [lindex $values 0] } set textobj [class::textinfo new $datachunk] set summary "" append summary "Bytes loaded : [$textobj chunklen]" \n append summary "Lines recognised : [$textobj linecount]" \n set leinfo [$textobj chunk_le_counts 0 end] append summary "crlf endings (windows) : [dict get $leinfo crlf]" \n append summary "lf endings (unix) : [dict get $leinfo lf]" \n append summary "unterminated lines : [dict get $leinfo unterminated]" \n puts stdout $summary return $textobj } proc file_boundary_display {filename startbyte endbyte chunksize args} { set fd [open $filename r] ;#use default error if file not readable fconfigure $fd -translation binary set rawfiledata [read $fd] close $fd set textobj [class::textinfo new $rawfiledata] set result [$textobj chunk_boundary_display $startbyte $endbyte $chunksize {*}$args] $textobj destroy return $result } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::fileline ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Secondary API namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::fileline::lib { namespace export * namespace path [namespace parent] #*** !doctools #[subsection {Namespace punk::fileline::lib}] #[para] Secondary functions that are part of the API #[list_begin definitions] proc range_spans_chunk_boundaries {start end chunksize args} { #*** !doctools #[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] #[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. #[list_begin arguments] # [arg_def integer start] # [para] zero-based start index of range # [arg_def integer end] # [para] zero-based end index of range # [arg_def integer chunksize] # [para] Number of bytes/characters in chunk - must be positive and > 0 #[list_end] #[para]returns a dict with the keys is_span and boundaries #[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize #[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize #[para]e.g #[example_begin] # range_spans_chunk_boundaries 10 1750 512 # is_span 1 boundaries {512 1024 1536} #[example_end] #[para]The -offset option #[example_begin] # range_spans_chunk_boundaries 10 1750 512 -offset 2 # is_span 1 boundaries {514 1026 1538} #[example_end] #[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 if {[catch {package require Tcl 8.7}]} { #only one implementation available for older Tcl tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args } if {$chunksize < 1} { error "range_spans_chunk_boundaries chunksize must be >= 1" } if {(abs($end - $start) / $chunksize) < 75} { tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args } else { tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize {*}$args } } proc range_boundaries {start end chunksizes args} { lassign [punk::get_leading_opts_and_values {\ -offset 0\ } $args] _opts opts _vals remainingargs } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Internal] namespace eval punk::fileline::system { #*** !doctools #[subsection {Namespace punk::fileline::system}] #[para] Internal functions that are not part of the API #for 8.7+ using lseq #much faster when resultant boundary size is large (at least when offset 0) proc _range_spans_chunk_boundaries_lseq {start end chunksize args} { if {$chunksize < 1} {error "chunksize must be > 0"} ;#sanitycheck in case called directly set defaults [dict create\ -offset 0\ ] set known_opts [dict keys $defaults] foreach {k v} $args { if {$k ni $known_opts} { error "unknown option '$k'. Known options: $known_opts" } } set opts [dict merge $defaults $args] # -- --- --- --- set opt_offset [dict get $opts -offset] # -- --- --- --- set smod [expr {$start % $chunksize}] if {$smod != 0} { set start [expr {$start + ($chunksize - $smod)}] if {$start > $end} { return [list is_span 0 boundaries {}] } } set boundaries [lseq $start to $end $chunksize] #offset can be negative if {$opt_offset} { if {$opt_offset + [lindex $boundaries end] > $end || $opt_offset + [lindex $boundaries 0] < $start} { set overflow 1 } else { set overflow 0 } set boundaries [lmap v $boundaries[unset boundaries] {expr {$v + $opt_offset}}] if {$overflow} { #we don't know how many overflowed.. set inrange [list] foreach b $boundaries { if {$b >= $start && $b <= $end} { lappend inrange $b } } set boundaries $inrange } } return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] } #faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case) #gets very slow (comparitively) with large resultsets proc _range_spans_chunk_boundaries_tcl {start end chunksize args} { if {$chunksize < 1} {error "chunksize must be > 0"} ;#sanitycheck in case called directly set defaults [dict create\ -offset 0\ ] set known_opts [dict keys $defaults] foreach {k v} $args { if {$k ni $known_opts} { error "unknown option '$k'. Known options: $known_opts" } } set opts [dict merge $defaults $args] # -- --- --- --- set opt_offset [dict get $opts -offset] # -- --- --- --- set is_span 0 set smod [expr {$start % $chunksize}] if {$smod != 0} { set start [expr {$start + ($chunksize - $smod)}] } set boundaries [list] #we only need to pre-check the result-range for negative offsets - as our main loop stops before end? if {$opt_offset < 0} { #set btrack [expr {$start + $opt_offset}] ;#start back one to make sure we catch the first boundary set btrack $bstart set boff [expr {$btrack + $opt_offset}] ;#must be growing even if start and offset are negative - as chunksize is at least 1 while {$boff < $start} { incr btrack $chunksize set boff [expr {$btrack + $opt_offset}] } set bstart $btrack } else { set bstart $start } for {set b $bstart} {[set boff [expr {$b + $opt_offset}]] <= $end} {incr b $chunksize} { lappend boundaries $boff } return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries offset $opt_offset] } proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} { puts "main : [time {punk::fileline::lib::range_spans_chunk_boundaries $start $end $chunksize} $repeat]" puts "tcl : [time {punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize} $repeat]" if {![catch {package require Tcl 8.7}]} { puts "lseq : [time {punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize} $repeat]" } } } namespace eval punk::fileline::ansi { #*** !doctools #[subsection {Namespace punk::fileline::ansi}] #[para]These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable #[para]See [package punk::ansi] for documentation #[list_begin definitions] variable enabled 1 #*** !doctools #[call [fun ansi::a]] #[call [fun ansi::a+]] #[call [fun ansi::stripansi]] #*** !doctools #[list_end] [comment {--- end definitions namespace punk::fileline::ansi ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::fileline [namespace eval punk::fileline { variable pkg punk::fileline variable version set version 999999.0a1.0 }] return #*** !doctools #[manpage_end]