Julian Noble
11 months ago
32 changed files with 2676 additions and 149 deletions
@ -0,0 +1,146 @@ |
|||||||
|
[comment {--- punk::docgen generated from inline doctools comments ---}] |
||||||
|
[comment {--- punk::docgen DO NOT EDIT DOCS HERE UNLESS YOU REMOVE THESE COMMENT LINES ---}] |
||||||
|
[comment {--- punk::docgen overwrites this file ---}] |
||||||
|
[manpage_begin punkshell_module_punk::fileline 0 0.1.0] |
||||||
|
[copyright "2024"] |
||||||
|
[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] |
||||||
|
[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] |
||||||
|
[require punk::fileline] |
||||||
|
[keywords module text parse file] |
||||||
|
[description] |
||||||
|
[para] - |
||||||
|
[section Overview] |
||||||
|
[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) |
||||||
|
[para]This is important for certain text files where examining the number of chars/bytes is important |
||||||
|
[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. |
||||||
|
[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. |
||||||
|
[subsection Concepts] |
||||||
|
[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. |
||||||
|
[example_begin] |
||||||
|
package require punk::fileline |
||||||
|
package require fileutil |
||||||
|
set rawdata [lb]fileutil::cat data.txt -translation binary[rb] |
||||||
|
punk::fileline::class::textinfo create obj_data $rawdata |
||||||
|
puts stdout [lb]obj_data linecount[rb] |
||||||
|
[example_end] |
||||||
|
[subsection Notes] |
||||||
|
[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. |
||||||
|
[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. |
||||||
|
[para]No support for lone carriage-returns being interpreted as line-endings. |
||||||
|
[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. |
||||||
|
[subsection dependencies] |
||||||
|
[para] packages used by punk::fileline |
||||||
|
[list_begin itemized] |
||||||
|
[item] [package {Tcl 8.6}] |
||||||
|
[list_end] |
||||||
|
[section API] |
||||||
|
[subsection {Namespace punk::fileline::class}] |
||||||
|
[para] class definitions |
||||||
|
[list_begin enumerated] |
||||||
|
[enum] CLASS [class textinfo] |
||||||
|
[list_begin definitions] |
||||||
|
[para] [emph METHODS] |
||||||
|
[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] |
||||||
|
[para] Constructor for textinfo object which represents a chunk or all of a file |
||||||
|
[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: |
||||||
|
[example_begin] |
||||||
|
fconfigure $fd -translation binary |
||||||
|
set chunkdata [lb]read $fd[rb]] |
||||||
|
or |
||||||
|
set chunkdata [lb]fileutil::cat <filename> -translation binary[rb] |
||||||
|
[example_end] |
||||||
|
[para] when loading the data |
||||||
|
[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] |
||||||
|
[para]Return a range of bytes from the underlying raw chunk data. |
||||||
|
[para] e.g The following retrieves the entire chunk |
||||||
|
[para] objName chunk 0 end |
||||||
|
[call class::textinfo [method chunklen]] |
||||||
|
[para] Number of bytes/characters in the raw data of the file |
||||||
|
[call class::textinfo [method linecount]] |
||||||
|
[para] Number of lines in the raw data of the file, counted as per the policy in effect |
||||||
|
[call class::textinfo [method regenerate_lines]] |
||||||
|
[para]generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex |
||||||
|
[call class::textinfo [method line] [arg lineindex]] |
||||||
|
[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata |
||||||
|
[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) |
||||||
|
[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" |
||||||
|
[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending |
||||||
|
[call class::textinfo [method linepayload] [arg lineindex]] |
||||||
|
[para]Return the text of the line indicated by the zero-based lineindex |
||||||
|
[para]The line-ending is not returned in the data - but is still stored against this lineindex |
||||||
|
[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method |
||||||
|
[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used |
||||||
|
[para]To retrieve an entire line including line-ending use the [method line] method. |
||||||
|
[call class::textinfo [method linemeta] [arg lineindex]] |
||||||
|
[para]Return a dict of the metadata for the line indicated by the zero-based lineindex |
||||||
|
[para]Keys returned include |
||||||
|
[list_begin itemized] |
||||||
|
[item] le |
||||||
|
[para] A string representing the type of line-ending: crlf|lf|none |
||||||
|
[item] linelen |
||||||
|
[para] The number of characters/bytes in the whole line including line-ending if any |
||||||
|
[item] payloadlen |
||||||
|
[para] The number of character/bytes in the line excluding line-ending |
||||||
|
[item] start |
||||||
|
[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins |
||||||
|
[item] end |
||||||
|
[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends |
||||||
|
[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload |
||||||
|
[list_end] |
||||||
|
[call class::textinfo [method lineinfo] [arg lineindex]] |
||||||
|
[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex |
||||||
|
[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. |
||||||
|
[para]The 'payload' value is the same as is returned from the [method linepayload] method. |
||||||
|
[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] |
||||||
|
[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] |
||||||
|
[call class::textinfo [method lines] [arg startidx] [arg endidx]] |
||||||
|
[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] |
||||||
|
[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] |
||||||
|
[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] |
||||||
|
[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied |
||||||
|
[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. |
||||||
|
[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) |
||||||
|
[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. |
||||||
|
[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] |
||||||
|
[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data |
||||||
|
[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices |
||||||
|
[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] |
||||||
|
[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data |
||||||
|
[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] |
||||||
|
[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max |
||||||
|
[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted |
||||||
|
[para]startidx higher than endidx is allowed |
||||||
|
[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max |
||||||
|
[list_end] |
||||||
|
[list_end] [comment {--- end class enumeration ---}] |
||||||
|
[subsection {Namespace punk::fileline}] |
||||||
|
[para] Core API functions for punk::fileline |
||||||
|
[list_begin definitions] |
||||||
|
[list_end] [comment {--- end definitions namespace punk::fileline ---}] |
||||||
|
[subsection {Namespace punk::fileline::lib}] |
||||||
|
[para] Secondary functions that are part of the API |
||||||
|
[list_begin definitions] |
||||||
|
[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] |
||||||
|
[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. |
||||||
|
[list_begin arguments] |
||||||
|
[arg_def integer start] |
||||||
|
[para] zero-based start index of range |
||||||
|
[arg_def integer end] |
||||||
|
[para] zero-based end index of range |
||||||
|
[arg_def integer chunksize] |
||||||
|
[para] Number of bytes/characters in chunk |
||||||
|
[list_end] |
||||||
|
[para]returns a dict with the keys is_span and boundaries |
||||||
|
[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||||
|
[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize |
||||||
|
[para]e.g |
||||||
|
[example_begin] |
||||||
|
range_spans_chunk_boundaries 10 1750 512 |
||||||
|
is_span 1 boundaries {512 1024 1536} |
||||||
|
[example_end] |
||||||
|
[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 |
||||||
|
[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] |
||||||
|
[section Internal] |
||||||
|
[subsection {Namespace punk::fileline::system}] |
||||||
|
[para] Internal functions that are not part of the API |
||||||
|
[manpage_end] |
@ -0,0 +1,559 @@ |
|||||||
|
'\" |
||||||
|
'\" Generated from file '_module_fileline-0\&.1\&.0\&.tm\&.man' by tcllib/doctools with format 'nroff' |
||||||
|
'\" Copyright (c) 2024 |
||||||
|
'\" |
||||||
|
.TH "punkshell_module_punk::fileline" 0 0\&.1\&.0 doc "punk fileline" |
||||||
|
.\" The -*- nroff -*- definitions below are for supplemental macros used |
||||||
|
.\" in Tcl/Tk manual entries. |
||||||
|
.\" |
||||||
|
.\" .AP type name in/out ?indent? |
||||||
|
.\" Start paragraph describing an argument to a library procedure. |
||||||
|
.\" type is type of argument (int, etc.), in/out is either "in", "out", |
||||||
|
.\" or "in/out" to describe whether procedure reads or modifies arg, |
||||||
|
.\" and indent is equivalent to second arg of .IP (shouldn't ever be |
||||||
|
.\" needed; use .AS below instead) |
||||||
|
.\" |
||||||
|
.\" .AS ?type? ?name? |
||||||
|
.\" Give maximum sizes of arguments for setting tab stops. Type and |
||||||
|
.\" name are examples of largest possible arguments that will be passed |
||||||
|
.\" to .AP later. If args are omitted, default tab stops are used. |
||||||
|
.\" |
||||||
|
.\" .BS |
||||||
|
.\" Start box enclosure. From here until next .BE, everything will be |
||||||
|
.\" enclosed in one large box. |
||||||
|
.\" |
||||||
|
.\" .BE |
||||||
|
.\" End of box enclosure. |
||||||
|
.\" |
||||||
|
.\" .CS |
||||||
|
.\" Begin code excerpt. |
||||||
|
.\" |
||||||
|
.\" .CE |
||||||
|
.\" End code excerpt. |
||||||
|
.\" |
||||||
|
.\" .VS ?version? ?br? |
||||||
|
.\" Begin vertical sidebar, for use in marking newly-changed parts |
||||||
|
.\" of man pages. The first argument is ignored and used for recording |
||||||
|
.\" the version when the .VS was added, so that the sidebars can be |
||||||
|
.\" found and removed when they reach a certain age. If another argument |
||||||
|
.\" is present, then a line break is forced before starting the sidebar. |
||||||
|
.\" |
||||||
|
.\" .VE |
||||||
|
.\" End of vertical sidebar. |
||||||
|
.\" |
||||||
|
.\" .DS |
||||||
|
.\" Begin an indented unfilled display. |
||||||
|
.\" |
||||||
|
.\" .DE |
||||||
|
.\" End of indented unfilled display. |
||||||
|
.\" |
||||||
|
.\" .SO ?manpage? |
||||||
|
.\" Start of list of standard options for a Tk widget. The manpage |
||||||
|
.\" argument defines where to look up the standard options; if |
||||||
|
.\" omitted, defaults to "options". The options follow on successive |
||||||
|
.\" lines, in three columns separated by tabs. |
||||||
|
.\" |
||||||
|
.\" .SE |
||||||
|
.\" End of list of standard options for a Tk widget. |
||||||
|
.\" |
||||||
|
.\" .OP cmdName dbName dbClass |
||||||
|
.\" Start of description of a specific option. cmdName gives the |
||||||
|
.\" option's name as specified in the class command, dbName gives |
||||||
|
.\" the option's name in the option database, and dbClass gives |
||||||
|
.\" the option's class in the option database. |
||||||
|
.\" |
||||||
|
.\" .UL arg1 arg2 |
||||||
|
.\" Print arg1 underlined, then print arg2 normally. |
||||||
|
.\" |
||||||
|
.\" .QW arg1 ?arg2? |
||||||
|
.\" Print arg1 in quotes, then arg2 normally (for trailing punctuation). |
||||||
|
.\" |
||||||
|
.\" .PQ arg1 ?arg2? |
||||||
|
.\" Print an open parenthesis, arg1 in quotes, then arg2 normally |
||||||
|
.\" (for trailing punctuation) and then a closing parenthesis. |
||||||
|
.\" |
||||||
|
.\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. |
||||||
|
.if t .wh -1.3i ^B |
||||||
|
.nr ^l \n(.l |
||||||
|
.ad b |
||||||
|
.\" # Start an argument description |
||||||
|
.de AP |
||||||
|
.ie !"\\$4"" .TP \\$4 |
||||||
|
.el \{\ |
||||||
|
. ie !"\\$2"" .TP \\n()Cu |
||||||
|
. el .TP 15 |
||||||
|
.\} |
||||||
|
.ta \\n()Au \\n()Bu |
||||||
|
.ie !"\\$3"" \{\ |
||||||
|
\&\\$1 \\fI\\$2\\fP (\\$3) |
||||||
|
.\".b |
||||||
|
.\} |
||||||
|
.el \{\ |
||||||
|
.br |
||||||
|
.ie !"\\$2"" \{\ |
||||||
|
\&\\$1 \\fI\\$2\\fP |
||||||
|
.\} |
||||||
|
.el \{\ |
||||||
|
\&\\fI\\$1\\fP |
||||||
|
.\} |
||||||
|
.\} |
||||||
|
.. |
||||||
|
.\" # define tabbing values for .AP |
||||||
|
.de AS |
||||||
|
.nr )A 10n |
||||||
|
.if !"\\$1"" .nr )A \\w'\\$1'u+3n |
||||||
|
.nr )B \\n()Au+15n |
||||||
|
.\" |
||||||
|
.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n |
||||||
|
.nr )C \\n()Bu+\\w'(in/out)'u+2n |
||||||
|
.. |
||||||
|
.AS Tcl_Interp Tcl_CreateInterp in/out |
||||||
|
.\" # BS - start boxed text |
||||||
|
.\" # ^y = starting y location |
||||||
|
.\" # ^b = 1 |
||||||
|
.de BS |
||||||
|
.br |
||||||
|
.mk ^y |
||||||
|
.nr ^b 1u |
||||||
|
.if n .nf |
||||||
|
.if n .ti 0 |
||||||
|
.if n \l'\\n(.lu\(ul' |
||||||
|
.if n .fi |
||||||
|
.. |
||||||
|
.\" # BE - end boxed text (draw box now) |
||||||
|
.de BE |
||||||
|
.nf |
||||||
|
.ti 0 |
||||||
|
.mk ^t |
||||||
|
.ie n \l'\\n(^lu\(ul' |
||||||
|
.el \{\ |
||||||
|
.\" Draw four-sided box normally, but don't draw top of |
||||||
|
.\" box if the box started on an earlier page. |
||||||
|
.ie !\\n(^b-1 \{\ |
||||||
|
\h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
||||||
|
.\} |
||||||
|
.el \}\ |
||||||
|
\h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' |
||||||
|
.\} |
||||||
|
.\} |
||||||
|
.fi |
||||||
|
.br |
||||||
|
.nr ^b 0 |
||||||
|
.. |
||||||
|
.\" # VS - start vertical sidebar |
||||||
|
.\" # ^Y = starting y location |
||||||
|
.\" # ^v = 1 (for troff; for nroff this doesn't matter) |
||||||
|
.de VS |
||||||
|
.if !"\\$2"" .br |
||||||
|
.mk ^Y |
||||||
|
.ie n 'mc \s12\(br\s0 |
||||||
|
.el .nr ^v 1u |
||||||
|
.. |
||||||
|
.\" # VE - end of vertical sidebar |
||||||
|
.de VE |
||||||
|
.ie n 'mc |
||||||
|
.el \{\ |
||||||
|
.ev 2 |
||||||
|
.nf |
||||||
|
.ti 0 |
||||||
|
.mk ^t |
||||||
|
\h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' |
||||||
|
.sp -1 |
||||||
|
.fi |
||||||
|
.ev |
||||||
|
.\} |
||||||
|
.nr ^v 0 |
||||||
|
.. |
||||||
|
.\" # Special macro to handle page bottom: finish off current |
||||||
|
.\" # box/sidebar if in box/sidebar mode, then invoked standard |
||||||
|
.\" # page bottom macro. |
||||||
|
.de ^B |
||||||
|
.ev 2 |
||||||
|
'ti 0 |
||||||
|
'nf |
||||||
|
.mk ^t |
||||||
|
.if \\n(^b \{\ |
||||||
|
.\" Draw three-sided box if this is the box's first page, |
||||||
|
.\" draw two sides but no top otherwise. |
||||||
|
.ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
||||||
|
.el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c |
||||||
|
.\} |
||||||
|
.if \\n(^v \{\ |
||||||
|
.nr ^x \\n(^tu+1v-\\n(^Yu |
||||||
|
\kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c |
||||||
|
.\} |
||||||
|
.bp |
||||||
|
'fi |
||||||
|
.ev |
||||||
|
.if \\n(^b \{\ |
||||||
|
.mk ^y |
||||||
|
.nr ^b 2 |
||||||
|
.\} |
||||||
|
.if \\n(^v \{\ |
||||||
|
.mk ^Y |
||||||
|
.\} |
||||||
|
.. |
||||||
|
.\" # DS - begin display |
||||||
|
.de DS |
||||||
|
.RS |
||||||
|
.nf |
||||||
|
.sp |
||||||
|
.. |
||||||
|
.\" # DE - end display |
||||||
|
.de DE |
||||||
|
.fi |
||||||
|
.RE |
||||||
|
.sp |
||||||
|
.. |
||||||
|
.\" # SO - start of list of standard options |
||||||
|
.de SO |
||||||
|
'ie '\\$1'' .ds So \\fBoptions\\fR |
||||||
|
'el .ds So \\fB\\$1\\fR |
||||||
|
.SH "STANDARD OPTIONS" |
||||||
|
.LP |
||||||
|
.nf |
||||||
|
.ta 5.5c 11c |
||||||
|
.ft B |
||||||
|
.. |
||||||
|
.\" # SE - end of list of standard options |
||||||
|
.de SE |
||||||
|
.fi |
||||||
|
.ft R |
||||||
|
.LP |
||||||
|
See the \\*(So manual entry for details on the standard options. |
||||||
|
.. |
||||||
|
.\" # OP - start of full description for a single option |
||||||
|
.de OP |
||||||
|
.LP |
||||||
|
.nf |
||||||
|
.ta 4c |
||||||
|
Command-Line Name: \\fB\\$1\\fR |
||||||
|
Database Name: \\fB\\$2\\fR |
||||||
|
Database Class: \\fB\\$3\\fR |
||||||
|
.fi |
||||||
|
.IP |
||||||
|
.. |
||||||
|
.\" # CS - begin code excerpt |
||||||
|
.de CS |
||||||
|
.RS |
||||||
|
.nf |
||||||
|
.ta .25i .5i .75i 1i |
||||||
|
.. |
||||||
|
.\" # CE - end code excerpt |
||||||
|
.de CE |
||||||
|
.fi |
||||||
|
.RE |
||||||
|
.. |
||||||
|
.\" # UL - underline word |
||||||
|
.de UL |
||||||
|
\\$1\l'|0\(ul'\\$2 |
||||||
|
.. |
||||||
|
.\" # QW - apply quotation marks to word |
||||||
|
.de QW |
||||||
|
.ie '\\*(lq'"' ``\\$1''\\$2 |
||||||
|
.\"" fix emacs highlighting |
||||||
|
.el \\*(lq\\$1\\*(rq\\$2 |
||||||
|
.. |
||||||
|
.\" # PQ - apply parens and quotation marks to word |
||||||
|
.de PQ |
||||||
|
.ie '\\*(lq'"' (``\\$1''\\$2)\\$3 |
||||||
|
.\"" fix emacs highlighting |
||||||
|
.el (\\*(lq\\$1\\*(rq\\$2)\\$3 |
||||||
|
.. |
||||||
|
.\" # QR - quoted range |
||||||
|
.de QR |
||||||
|
.ie '\\*(lq'"' ``\\$1''\\-``\\$2''\\$3 |
||||||
|
.\"" fix emacs highlighting |
||||||
|
.el \\*(lq\\$1\\*(rq\\-\\*(lq\\$2\\*(rq\\$3 |
||||||
|
.. |
||||||
|
.\" # MT - "empty" string |
||||||
|
.de MT |
||||||
|
.QW "" |
||||||
|
.. |
||||||
|
.BS |
||||||
|
.SH NAME |
||||||
|
punkshell_module_punk::fileline \- file line-handling utilities |
||||||
|
.SH SYNOPSIS |
||||||
|
package require \fBpunk::fileline \fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBconstructor\fR \fIdatachunk\fR ?option value\&.\&.\&.? |
||||||
|
.sp |
||||||
|
class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBchunklen\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlinecount\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBregenerate_lines\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBline\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlinepayload\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlinemeta\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlineinfo\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlines\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBlinepayloads\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBchunkrange_to_linerange\fR \fIchunkstart\fR \fIchunkend\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBchunkrange_to_lineinfolist\fR \fIchunkstart\fR \fIchunkend\fR ?option value\&.\&.\&.? |
||||||
|
.sp |
||||||
|
class::textinfo \fBnumeric_linerange\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR |
||||||
|
.sp |
||||||
|
\fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR |
||||||
|
.sp |
||||||
|
.BE |
||||||
|
.SH DESCRIPTION |
||||||
|
.PP |
||||||
|
- |
||||||
|
.SH OVERVIEW |
||||||
|
.PP |
||||||
|
Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) |
||||||
|
.PP |
||||||
|
This is important for certain text files where examining the number of chars/bytes is important |
||||||
|
.PP |
||||||
|
For example - windows \&.cmd/\&.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved\&. |
||||||
|
.PP |
||||||
|
Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem\&. It is for operating on text-file like data\&. |
||||||
|
.SS CONCEPTS |
||||||
|
.PP |
||||||
|
A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation\&. |
||||||
|
.CS |
||||||
|
|
||||||
|
|
||||||
|
package require punk::fileline |
||||||
|
package require fileutil |
||||||
|
set rawdata [fileutil::cat data\&.txt -translation binary] |
||||||
|
punk::fileline::class::textinfo create obj_data $rawdata |
||||||
|
puts stdout [obj_data linecount] |
||||||
|
|
||||||
|
.CE |
||||||
|
.SS NOTES |
||||||
|
.PP |
||||||
|
Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files\&. |
||||||
|
.PP |
||||||
|
This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired\&. |
||||||
|
.PP |
||||||
|
No support for lone carriage-returns being interpreted as line-endings\&. |
||||||
|
.PP |
||||||
|
CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module\&. |
||||||
|
.SS DEPENDENCIES |
||||||
|
.PP |
||||||
|
packages used by punk::fileline |
||||||
|
.IP \(bu |
||||||
|
\fBTcl 8\&.6\fR |
||||||
|
.PP |
||||||
|
.SH API |
||||||
|
.SS "NAMESPACE PUNK::FILELINE::CLASS" |
||||||
|
.PP |
||||||
|
class definitions |
||||||
|
.IP [1] |
||||||
|
CLASS \fBtextinfo\fR |
||||||
|
.RS |
||||||
|
.sp |
||||||
|
\fIMETHODS\fR |
||||||
|
.TP |
||||||
|
class::textinfo \fBconstructor\fR \fIdatachunk\fR ?option value\&.\&.\&.? |
||||||
|
.sp |
||||||
|
Constructor for textinfo object which represents a chunk or all of a file |
||||||
|
.sp |
||||||
|
datachunk should be passed with the file data including line-endings as-is for full functionality\&. ie use something like: |
||||||
|
.CS |
||||||
|
|
||||||
|
|
||||||
|
fconfigure $fd -translation binary |
||||||
|
set chunkdata [read $fd]] |
||||||
|
or |
||||||
|
set chunkdata [fileutil::cat <filename> -translation binary] |
||||||
|
|
||||||
|
.CE |
||||||
|
.sp |
||||||
|
when loading the data |
||||||
|
.TP |
||||||
|
class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR |
||||||
|
.sp |
||||||
|
Return a range of bytes from the underlying raw chunk data\&. |
||||||
|
.sp |
||||||
|
e\&.g The following retrieves the entire chunk |
||||||
|
.sp |
||||||
|
objName chunk 0 end |
||||||
|
.TP |
||||||
|
class::textinfo \fBchunklen\fR |
||||||
|
.sp |
||||||
|
Number of bytes/characters in the raw data of the file |
||||||
|
.TP |
||||||
|
class::textinfo \fBlinecount\fR |
||||||
|
.sp |
||||||
|
Number of lines in the raw data of the file, counted as per the policy in effect |
||||||
|
.TP |
||||||
|
class::textinfo \fBregenerate_lines\fR |
||||||
|
.sp |
||||||
|
generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex |
||||||
|
.TP |
||||||
|
class::textinfo \fBline\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata |
||||||
|
.sp |
||||||
|
A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) |
||||||
|
.sp |
||||||
|
Whilst such data may not conform to definitions (e\&.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" |
||||||
|
.sp |
||||||
|
To return just the data which might more commonly be needed for dealing with lines, use the \fBlinepayload\fR method - which returns the line data minus line-ending |
||||||
|
.TP |
||||||
|
class::textinfo \fBlinepayload\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
Return the text of the line indicated by the zero-based lineindex |
||||||
|
.sp |
||||||
|
The line-ending is not returned in the data - but is still stored against this lineindex |
||||||
|
.sp |
||||||
|
Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the \fBlinemeta\fR method |
||||||
|
.sp |
||||||
|
To retrieve both the line text and metadata in a single call the \fBlineinfo\fR method can be used |
||||||
|
.sp |
||||||
|
To retrieve an entire line including line-ending use the \fBline\fR method\&. |
||||||
|
.TP |
||||||
|
class::textinfo \fBlinemeta\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
Return a dict of the metadata for the line indicated by the zero-based lineindex |
||||||
|
.sp |
||||||
|
Keys returned include |
||||||
|
.RS |
||||||
|
.IP \(bu |
||||||
|
le |
||||||
|
.sp |
||||||
|
A string representing the type of line-ending: crlf|lf|none |
||||||
|
.IP \(bu |
||||||
|
linelen |
||||||
|
.sp |
||||||
|
The number of characters/bytes in the whole line including line-ending if any |
||||||
|
.IP \(bu |
||||||
|
payloadlen |
||||||
|
.sp |
||||||
|
The number of character/bytes in the line excluding line-ending |
||||||
|
.IP \(bu |
||||||
|
start |
||||||
|
.sp |
||||||
|
The zero-based index into the associated raw file data indicating at which byte/character index this line begins |
||||||
|
.IP \(bu |
||||||
|
end |
||||||
|
.sp |
||||||
|
The zero-based index into the associated raw file data indicating at which byte/character index this line ends |
||||||
|
.sp |
||||||
|
This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload |
||||||
|
.RE |
||||||
|
.TP |
||||||
|
class::textinfo \fBlineinfo\fR \fIlineindex\fR |
||||||
|
.sp |
||||||
|
Return a dict of the metadata and text for the line indicated by the zero-based lineindex |
||||||
|
.sp |
||||||
|
This returns the same info as the \fBlinemeta\fR with an added key of 'payload' which is the text of the line without line-ending\&. |
||||||
|
.sp |
||||||
|
The 'payload' value is the same as is returned from the \fBlinepayload\fR method\&. |
||||||
|
.TP |
||||||
|
class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.TP |
||||||
|
class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.TP |
||||||
|
class::textinfo \fBlines\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.TP |
||||||
|
class::textinfo \fBlinepayloads\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.TP |
||||||
|
class::textinfo \fBchunkrange_to_linerange\fR \fIchunkstart\fR \fIchunkend\fR |
||||||
|
.TP |
||||||
|
class::textinfo \fBchunkrange_to_lineinfolist\fR \fIchunkstart\fR \fIchunkend\fR ?option value\&.\&.\&.? |
||||||
|
.sp |
||||||
|
Return a list of dicts each with structure like the result of the \fBlineinfo\fR method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied |
||||||
|
.sp |
||||||
|
The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list\&. |
||||||
|
.sp |
||||||
|
truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) |
||||||
|
.sp |
||||||
|
Note that this truncation info is only in the return value of this method - and will not be reflected in \fBlineinfo\fR queries to the main chunk\&. |
||||||
|
.TP |
||||||
|
class::textinfo \fBnumeric_linerange\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data |
||||||
|
.sp |
||||||
|
This is used internally by API functions such as \fBline\fR to enable it to accept more expressive indices |
||||||
|
.TP |
||||||
|
class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR |
||||||
|
.sp |
||||||
|
A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data |
||||||
|
.TP |
||||||
|
class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR |
||||||
|
.sp |
||||||
|
A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max |
||||||
|
.sp |
||||||
|
Basic addition and subtraction expressions such as 4-1 5+2 are accepted |
||||||
|
.sp |
||||||
|
startidx higher than endidx is allowed |
||||||
|
.sp |
||||||
|
Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max |
||||||
|
.RE |
||||||
|
.PP |
||||||
|
.SS "NAMESPACE PUNK::FILELINE" |
||||||
|
.PP |
||||||
|
Core API functions for punk::fileline |
||||||
|
.PP |
||||||
|
.SS "NAMESPACE PUNK::FILELINE::LIB" |
||||||
|
.PP |
||||||
|
Secondary functions that are part of the API |
||||||
|
.TP |
||||||
|
\fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR |
||||||
|
.sp |
||||||
|
Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range\&. |
||||||
|
.RS |
||||||
|
.TP |
||||||
|
integer \fIstart\fR |
||||||
|
.sp |
||||||
|
zero-based start index of range |
||||||
|
.TP |
||||||
|
integer \fIend\fR |
||||||
|
.sp |
||||||
|
zero-based end index of range |
||||||
|
.TP |
||||||
|
integer \fIchunksize\fR |
||||||
|
.sp |
||||||
|
Number of bytes/characters in chunk |
||||||
|
.RE |
||||||
|
.sp |
||||||
|
returns a dict with the keys is_span and boundaries |
||||||
|
.sp |
||||||
|
is_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||||
|
.sp |
||||||
|
boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize |
||||||
|
.sp |
||||||
|
e\&.g |
||||||
|
.CS |
||||||
|
|
||||||
|
|
||||||
|
range_spans_chunk_boundaries 10 1750 512 |
||||||
|
is_span 1 boundaries {512 1024 1536} |
||||||
|
|
||||||
|
.CE |
||||||
|
.sp |
||||||
|
This function automatically uses lseq (if Tcl >= 8\&.7) when number of boundaries spanned is approximately greater than 75 |
||||||
|
.PP |
||||||
|
.SH INTERNAL |
||||||
|
.SS "NAMESPACE PUNK::FILELINE::SYSTEM" |
||||||
|
.PP |
||||||
|
Internal functions that are not part of the API |
||||||
|
.SH KEYWORDS |
||||||
|
file, module, parse, text |
||||||
|
.SH COPYRIGHT |
||||||
|
.nf |
||||||
|
Copyright (c) 2024 |
||||||
|
|
||||||
|
.fi |
@ -1 +1 @@ |
|||||||
{shell {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} changelog {{doc/files/project_changes.md punkshell__project_changes}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} capability {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} module {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} punk {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} repl {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}}} {{changelog doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/main.md punkshell} . {repl doc/files/project_intro.md punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {shell doc/files/project_intro.md punkshell__project_intro} . {punk doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/main.md punkshell} . {repl doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/project_intro.md punkshell__project_intro} . {repl doc/files/main.md punkshell} . {capability doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} .} 9 {shell shell changelog changelog filesystem filesystem path path capability capability module module punk punk plugin plugin repl repl} |
{file {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} repl {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} text {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} shell {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} changelog {{doc/files/project_changes.md punkshell__project_changes}} capability {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} parse {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} module {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} {doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} punk {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}}} {{shell doc/files/project_changes.md punkshell__project_changes} . {changelog doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/main.md punkshell} . {text doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/project_intro.md punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {punk doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/project_intro.md punkshell__project_intro} . {parse doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {punk doc/files/main.md punkshell} . {module doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/project_intro.md punkshell__project_intro} . {file doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/main.md punkshell} . {capability doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} .} 12 {file file repl repl text text shell shell changelog changelog capability capability parse parse filesystem filesystem path path module module punk punk plugin plugin} |
@ -1 +1 @@ |
|||||||
doc {doc/toc {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.md punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.md punkshell {punkshell - Core}}}} |
doc {doc/toc {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline {file line-handling utilities}} {doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.md punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.md punkshell {punkshell - Core}}}} |
@ -1 +1 @@ |
|||||||
kw,capability {index.md capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell Changes} doc/files/project_changes.md {Introduction to punkshell} doc/files/project_intro.md punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md sa,punkshell(n) doc/files/main.md filesystem {index.md filesystem} sa,punkshell doc/files/main.md kw,shell {index.md shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell__project_changes(n) doc/files/project_changes.md kw,path {index.md path} kw,module {index.md module} punkshell(n) doc/files/main.md kw,plugin {index.md plugin} punkshell doc/files/main.md punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md changelog {index.md changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md punkshell__project_changes(n) doc/files/project_changes.md sa,punkshell__project_changes doc/files/project_changes.md path {index.md path} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_changes doc/files/project_changes.md kw,filesystem {index.md filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md shell {index.md shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md kw,repl {index.md repl} capability {index.md capability} punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell - Core} doc/files/main.md {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.md repl {index.md repl} kw,punk {index.md punk} sa,punkshell__project_intro(n) doc/files/project_intro.md sa,punkshell__project_intro doc/files/project_intro.md {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_intro(n) doc/files/project_intro.md punkshell__project_intro doc/files/project_intro.md kw,changelog {index.md changelog} punk {index.md punk} module {index.md module} plugin {index.md plugin} |
kw,capability {index.md capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell Changes} doc/files/project_changes.md {Introduction to punkshell} doc/files/project_intro.md sa,punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md sa,punkshell(n) doc/files/main.md filesystem {index.md filesystem} sa,punkshell doc/files/main.md kw,shell {index.md shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md kw,parse {index.md parse} sa,punkshell__project_changes(n) doc/files/project_changes.md kw,path {index.md path} kw,module {index.md module} punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.md punkshell(n) doc/files/main.md kw,plugin {index.md plugin} punkshell doc/files/main.md kw,file {index.md file} punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md changelog {index.md changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md punkshell__project_changes(n) doc/files/project_changes.md sa,punkshell__project_changes doc/files/project_changes.md path {index.md path} file {index.md file} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_changes doc/files/project_changes.md kw,filesystem {index.md filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md shell {index.md shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md kw,repl {index.md repl} capability {index.md capability} kw,text {index.md text} parse {index.md parse} sa,punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell - Core} doc/files/main.md {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.md repl {index.md repl} punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.md kw,punk {index.md punk} sa,punkshell__project_intro(n) doc/files/project_intro.md text {index.md text} sa,punkshell__project_intro doc/files/project_intro.md {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_intro(n) doc/files/project_intro.md {file line-handling utilities} doc/files/punk/_module_fileline-0.1.0.tm.md punkshell__project_intro doc/files/project_intro.md kw,changelog {index.md changelog} module {index.md module} punk {index.md punk} plugin {index.md plugin} |
@ -0,0 +1,353 @@ |
|||||||
|
|
||||||
|
[//000000001]: # (punkshell\_module\_punk::fileline \- punk fileline) |
||||||
|
[//000000002]: # (Generated from file '\_module\_fileline\-0\.1\.0\.tm\.man' by tcllib/doctools with format 'markdown') |
||||||
|
[//000000003]: # (Copyright © 2024) |
||||||
|
[//000000004]: # (punkshell\_module\_punk::fileline\(0\) 0\.1\.0 doc "punk fileline") |
||||||
|
|
||||||
|
<hr> [ <a href="../../../toc.md">Main Table Of Contents</a> | <a |
||||||
|
href="../../toc.md">Table Of Contents</a> | <a |
||||||
|
href="../../../index.md">Keyword Index</a> ] <hr> |
||||||
|
|
||||||
|
# NAME |
||||||
|
|
||||||
|
punkshell\_module\_punk::fileline \- file line\-handling utilities |
||||||
|
|
||||||
|
# <a name='toc'></a>Table Of Contents |
||||||
|
|
||||||
|
- [Table Of Contents](#toc) |
||||||
|
|
||||||
|
- [Synopsis](#synopsis) |
||||||
|
|
||||||
|
- [Description](#section1) |
||||||
|
|
||||||
|
- [Overview](#section2) |
||||||
|
|
||||||
|
- [Concepts](#subsection1) |
||||||
|
|
||||||
|
- [Notes](#subsection2) |
||||||
|
|
||||||
|
- [dependencies](#subsection3) |
||||||
|
|
||||||
|
- [API](#section3) |
||||||
|
|
||||||
|
- [Namespace punk::fileline::class](#subsection4) |
||||||
|
|
||||||
|
- [Namespace punk::fileline](#subsection5) |
||||||
|
|
||||||
|
- [Namespace punk::fileline::lib](#subsection6) |
||||||
|
|
||||||
|
- [Internal](#section4) |
||||||
|
|
||||||
|
- [Namespace punk::fileline::system](#subsection7) |
||||||
|
|
||||||
|
- [Keywords](#keywords) |
||||||
|
|
||||||
|
- [Copyright](#copyright) |
||||||
|
|
||||||
|
# <a name='synopsis'></a>SYNOPSIS |
||||||
|
|
||||||
|
package require punk::fileline |
||||||
|
|
||||||
|
[class::textinfo __constructor__ *datachunk* ?option value\.\.\.?](#1) |
||||||
|
[class::textinfo __chunk__ *chunkstart* *chunkend*](#2) |
||||||
|
[class::textinfo __chunklen__](#3) |
||||||
|
[class::textinfo __linecount__](#4) |
||||||
|
[class::textinfo __regenerate\_lines__](#5) |
||||||
|
[class::textinfo __line__ *lineindex*](#6) |
||||||
|
[class::textinfo __linepayload__ *lineindex*](#7) |
||||||
|
[class::textinfo __linemeta__ *lineindex*](#8) |
||||||
|
[class::textinfo __lineinfo__ *lineindex*](#9) |
||||||
|
[class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx*](#10) |
||||||
|
[class::textinfo __linerange\_to\_chunk__ *startidx* *endidx*](#11) |
||||||
|
[class::textinfo __lines__ *startidx* *endidx*](#12) |
||||||
|
[class::textinfo __linepayloads__ *startidx* *endidx*](#13) |
||||||
|
[class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend*](#14) |
||||||
|
[class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.?](#15) |
||||||
|
[class::textinfo __numeric\_linerange__ *startidx* *endidx*](#16) |
||||||
|
[class::textinfo __numeric\_chunkrange__ *startidx* *endidx*](#17) |
||||||
|
[class::textinfo __normalize\_indices__ *startidx* *endidx* *max*](#18) |
||||||
|
[__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize*](#19) |
||||||
|
|
||||||
|
# <a name='description'></a>DESCRIPTION |
||||||
|
|
||||||
|
\- |
||||||
|
|
||||||
|
# <a name='section2'></a>Overview |
||||||
|
|
||||||
|
Utilities for in\-memory analysis of text file data as both line data and |
||||||
|
byte/char\-counted data whilst preserving the line\-endings \(even if mixed\) |
||||||
|
|
||||||
|
This is important for certain text files where examining the number of |
||||||
|
chars/bytes is important |
||||||
|
|
||||||
|
For example \- windows \.cmd/\.bat files need some byte counting to determine if |
||||||
|
labels lie on chunk boundaries and need to be moved\. |
||||||
|
|
||||||
|
Despite including the word 'file', the library doesn't deal with reading/writing |
||||||
|
to the filesystem\. It is for operating on text\-file like data\. |
||||||
|
|
||||||
|
## <a name='subsection1'></a>Concepts |
||||||
|
|
||||||
|
A chunk of textfile data \(possibly representing a whole file \- but usually at |
||||||
|
least a complete set of lines\) is loaded into a punk::fileline::class::textinfo |
||||||
|
instance at object creation\. |
||||||
|
|
||||||
|
package require punk::fileline |
||||||
|
package require fileutil |
||||||
|
set rawdata [fileutil::cat data.txt -translation binary] |
||||||
|
punk::fileline::class::textinfo create obj_data $rawdata |
||||||
|
puts stdout [obj_data linecount] |
||||||
|
|
||||||
|
## <a name='subsection2'></a>Notes |
||||||
|
|
||||||
|
Line records are referred to by a zero\-based index instead of a one\-based index |
||||||
|
as is commonly used when displaying files\. |
||||||
|
|
||||||
|
This is for programming consistency and convenience, and the module user should |
||||||
|
do their own conversion to one\-based indexing for line display or messaging if |
||||||
|
desired\. |
||||||
|
|
||||||
|
No support for lone carriage\-returns being interpreted as line\-endings\. |
||||||
|
|
||||||
|
CR line\-endings that are intended to be interpreted as such should be mapped to |
||||||
|
something else before the data is supplied to this module\. |
||||||
|
|
||||||
|
## <a name='subsection3'></a>dependencies |
||||||
|
|
||||||
|
packages used by punk::fileline |
||||||
|
|
||||||
|
- __Tcl 8\.6__ |
||||||
|
|
||||||
|
# <a name='section3'></a>API |
||||||
|
|
||||||
|
## <a name='subsection4'></a>Namespace punk::fileline::class |
||||||
|
|
||||||
|
class definitions |
||||||
|
|
||||||
|
1. CLASS __textinfo__ |
||||||
|
|
||||||
|
- <a name='1'></a>class::textinfo __constructor__ *datachunk* ?option value\.\.\.? |
||||||
|
|
||||||
|
*METHODS* |
||||||
|
|
||||||
|
Constructor for textinfo object which represents a chunk or all of a |
||||||
|
file |
||||||
|
|
||||||
|
datachunk should be passed with the file data including line\-endings |
||||||
|
as\-is for full functionality\. ie use something like: |
||||||
|
|
||||||
|
fconfigure $fd -translation binary |
||||||
|
set chunkdata [read $fd]] |
||||||
|
or |
||||||
|
set chunkdata [fileutil::cat <filename> -translation binary] |
||||||
|
|
||||||
|
when loading the data |
||||||
|
|
||||||
|
- <a name='2'></a>class::textinfo __chunk__ *chunkstart* *chunkend* |
||||||
|
|
||||||
|
Return a range of bytes from the underlying raw chunk data\. |
||||||
|
|
||||||
|
e\.g The following retrieves the entire chunk |
||||||
|
|
||||||
|
objName chunk 0 end |
||||||
|
|
||||||
|
- <a name='3'></a>class::textinfo __chunklen__ |
||||||
|
|
||||||
|
Number of bytes/characters in the raw data of the file |
||||||
|
|
||||||
|
- <a name='4'></a>class::textinfo __linecount__ |
||||||
|
|
||||||
|
Number of lines in the raw data of the file, counted as per the policy |
||||||
|
in effect |
||||||
|
|
||||||
|
- <a name='5'></a>class::textinfo __regenerate\_lines__ |
||||||
|
|
||||||
|
generate a list of lines from the stored raw data chunk and keep a map |
||||||
|
of line\-endings indexed by lineindex |
||||||
|
|
||||||
|
- <a name='6'></a>class::textinfo __line__ *lineindex* |
||||||
|
|
||||||
|
Reconstructs and returns the raw line using the payload and per\-line |
||||||
|
stored line\-ending metadata |
||||||
|
|
||||||
|
A 'line' may be returned without a line\-ending if the unerlying chunk |
||||||
|
had trailing data without a line\-ending \(or the chunk was loaded under |
||||||
|
a non\-standard \-policy setting\) |
||||||
|
|
||||||
|
Whilst such data may not conform to definitions \(e\.g POSIX\) of the |
||||||
|
terms 'textfile' and 'line' \- it is useful here to represent it as a |
||||||
|
line with metadata le set to "none" |
||||||
|
|
||||||
|
To return just the data which might more commonly be needed for dealing |
||||||
|
with lines, use the __linepayload__ method \- which returns the line |
||||||
|
data minus line\-ending |
||||||
|
|
||||||
|
- <a name='7'></a>class::textinfo __linepayload__ *lineindex* |
||||||
|
|
||||||
|
Return the text of the line indicated by the zero\-based lineindex |
||||||
|
|
||||||
|
The line\-ending is not returned in the data \- but is still stored |
||||||
|
against this lineindex |
||||||
|
|
||||||
|
Line Metadata such as the line\-ending for a particular line and the |
||||||
|
byte/character range it occupies within the chunk can be retrieved with |
||||||
|
the __linemeta__ method |
||||||
|
|
||||||
|
To retrieve both the line text and metadata in a single call the |
||||||
|
__lineinfo__ method can be used |
||||||
|
|
||||||
|
To retrieve an entire line including line\-ending use the __line__ |
||||||
|
method\. |
||||||
|
|
||||||
|
- <a name='8'></a>class::textinfo __linemeta__ *lineindex* |
||||||
|
|
||||||
|
Return a dict of the metadata for the line indicated by the zero\-based |
||||||
|
lineindex |
||||||
|
|
||||||
|
Keys returned include |
||||||
|
|
||||||
|
* le |
||||||
|
|
||||||
|
A string representing the type of line\-ending: crlf|lf|none |
||||||
|
|
||||||
|
* linelen |
||||||
|
|
||||||
|
The number of characters/bytes in the whole line including |
||||||
|
line\-ending if any |
||||||
|
|
||||||
|
* payloadlen |
||||||
|
|
||||||
|
The number of character/bytes in the line excluding line\-ending |
||||||
|
|
||||||
|
* start |
||||||
|
|
||||||
|
The zero\-based index into the associated raw file data indicating |
||||||
|
at which byte/character index this line begins |
||||||
|
|
||||||
|
* end |
||||||
|
|
||||||
|
The zero\-based index into the associated raw file data indicating |
||||||
|
at which byte/character index this line ends |
||||||
|
|
||||||
|
This end\-point corresponds to the last character of the line\-ending |
||||||
|
if any \- not necessarily the last character of the line's payload |
||||||
|
|
||||||
|
- <a name='9'></a>class::textinfo __lineinfo__ *lineindex* |
||||||
|
|
||||||
|
Return a dict of the metadata and text for the line indicated by the |
||||||
|
zero\-based lineindex |
||||||
|
|
||||||
|
This returns the same info as the __linemeta__ with an added key of |
||||||
|
'payload' which is the text of the line without line\-ending\. |
||||||
|
|
||||||
|
The 'payload' value is the same as is returned from the |
||||||
|
__linepayload__ method\. |
||||||
|
|
||||||
|
- <a name='10'></a>class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx* |
||||||
|
|
||||||
|
- <a name='11'></a>class::textinfo __linerange\_to\_chunk__ *startidx* *endidx* |
||||||
|
|
||||||
|
- <a name='12'></a>class::textinfo __lines__ *startidx* *endidx* |
||||||
|
|
||||||
|
- <a name='13'></a>class::textinfo __linepayloads__ *startidx* *endidx* |
||||||
|
|
||||||
|
- <a name='14'></a>class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend* |
||||||
|
|
||||||
|
- <a name='15'></a>class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.? |
||||||
|
|
||||||
|
Return a list of dicts each with structure like the result of the |
||||||
|
__lineinfo__ method \- but possibly with extra keys for truncation |
||||||
|
information if \-show\_truncated 1 is supplied |
||||||
|
|
||||||
|
The truncation key in a lineinfo dict may be returned for first and/or |
||||||
|
last line in the resulting list\. |
||||||
|
|
||||||
|
truncation shows the shortened \(missing bytes on left and/or right |
||||||
|
side\) part of the entire line \(potentially including line\-ending or |
||||||
|
even partial line\-ending\) |
||||||
|
|
||||||
|
Note that this truncation info is only in the return value of this |
||||||
|
method \- and will not be reflected in __lineinfo__ queries to the |
||||||
|
main chunk\. |
||||||
|
|
||||||
|
- <a name='16'></a>class::textinfo __numeric\_linerange__ *startidx* *endidx* |
||||||
|
|
||||||
|
A helper to return any Tcl\-style end end\-x values given to startidx or |
||||||
|
endidx; converted to their specific values based on the current state |
||||||
|
of the underlying line data |
||||||
|
|
||||||
|
This is used internally by API functions such as __line__ to enable |
||||||
|
it to accept more expressive indices |
||||||
|
|
||||||
|
- <a name='17'></a>class::textinfo __numeric\_chunkrange__ *startidx* *endidx* |
||||||
|
|
||||||
|
A helper to return any Tcl\-style end end\-x entries supplied to startidx |
||||||
|
or endidx; converted to their specific values based on the current |
||||||
|
state of the underlying chunk data |
||||||
|
|
||||||
|
- <a name='18'></a>class::textinfo __normalize\_indices__ *startidx* *endidx* *max* |
||||||
|
|
||||||
|
A utility to convert some of the of Tcl\-style list\-index expressions |
||||||
|
such as end, end\-1 etc to valid indices in the range 0 to the supplied |
||||||
|
max |
||||||
|
|
||||||
|
Basic addition and subtraction expressions such as 4\-1 5\+2 are accepted |
||||||
|
|
||||||
|
startidx higher than endidx is allowed |
||||||
|
|
||||||
|
Unlike Tcl's index expressions \- we raise an error if the calculated |
||||||
|
index is out of bounds 0 to max |
||||||
|
|
||||||
|
## <a name='subsection5'></a>Namespace punk::fileline |
||||||
|
|
||||||
|
Core API functions for punk::fileline |
||||||
|
|
||||||
|
## <a name='subsection6'></a>Namespace punk::fileline::lib |
||||||
|
|
||||||
|
- <a name='19'></a>__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize* |
||||||
|
|
||||||
|
Takes start and end offset, generally representing bytes or character |
||||||
|
indices, and computes a list of boundaries at multiples of the chunksize |
||||||
|
that are spanned by the start and end range\. |
||||||
|
|
||||||
|
* integer *start* |
||||||
|
|
||||||
|
zero\-based start index of range |
||||||
|
|
||||||
|
* integer *end* |
||||||
|
|
||||||
|
zero\-based end index of range |
||||||
|
|
||||||
|
* integer *chunksize* |
||||||
|
|
||||||
|
Number of bytes/characters in chunk |
||||||
|
|
||||||
|
returns a dict with the keys is\_span and boundaries |
||||||
|
|
||||||
|
is\_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||||
|
|
||||||
|
boundaries contains a list of the spanned boundaries \- which are always |
||||||
|
multiples of the chunksize |
||||||
|
|
||||||
|
e\.g |
||||||
|
|
||||||
|
range_spans_chunk_boundaries 10 1750 512 |
||||||
|
is_span 1 boundaries {512 1024 1536} |
||||||
|
|
||||||
|
This function automatically uses lseq \(if Tcl >= 8\.7\) when number of |
||||||
|
boundaries spanned is approximately greater than 75 |
||||||
|
|
||||||
|
# <a name='section4'></a>Internal |
||||||
|
|
||||||
|
## <a name='subsection7'></a>Namespace punk::fileline::system |
||||||
|
|
||||||
|
Internal functions that are not part of the API |
||||||
|
|
||||||
|
# <a name='keywords'></a>KEYWORDS |
||||||
|
|
||||||
|
[file](\.\./\.\./\.\./index\.md\#file), [module](\.\./\.\./\.\./index\.md\#module), |
||||||
|
[parse](\.\./\.\./\.\./index\.md\#parse), [text](\.\./\.\./\.\./index\.md\#text) |
||||||
|
|
||||||
|
# <a name='copyright'></a>COPYRIGHT |
||||||
|
|
||||||
|
Copyright © 2024 |
@ -1 +1 @@ |
|||||||
{shell {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} changelog {{doc/files/project_changes.html punkshell__project_changes}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} capability {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} module {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} punk {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} repl {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}}} {{repl doc/files/main.html punkshell} . {punk doc/files/project_intro.html punkshell__project_intro} . {capability doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {changelog doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/main.html punkshell} . {repl doc/files/project_intro.html punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {punk doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/project_intro.html punkshell__project_intro} . {punk doc/files/main.html punkshell} . {repl doc/files/project_changes.html punkshell__project_changes} .} 9 {shell shell changelog changelog filesystem filesystem path path capability capability module module punk punk plugin plugin repl repl} |
{file {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} repl {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} text {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} shell {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} changelog {{doc/files/project_changes.html punkshell__project_changes}} capability {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} parse {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} module {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} {doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} punk {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}}} {{repl doc/files/main.html punkshell} . {file doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {punk doc/files/project_intro.html punkshell__project_intro} . {capability doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {shell doc/files/project_changes.html punkshell__project_changes} . {changelog doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/main.html punkshell} . {text doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {repl doc/files/project_intro.html punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {path doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {plugin doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {shell doc/files/project_intro.html punkshell__project_intro} . {punk doc/files/project_changes.html punkshell__project_changes} . {parse doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {punk doc/files/main.html punkshell} . {module doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {repl doc/files/project_changes.html punkshell__project_changes} .} 12 {file file repl repl text text shell shell changelog changelog capability capability parse parse filesystem filesystem path path module module punk punk plugin plugin} |
@ -1 +1 @@ |
|||||||
doc {doc/toc {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.html punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.html punkshell {punkshell - Core}}}} |
doc {doc/toc {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline {file line-handling utilities}} {doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.html punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.html punkshell {punkshell - Core}}}} |
@ -1 +1 @@ |
|||||||
kw,capability {index.html capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell Changes} doc/files/project_changes.html {Introduction to punkshell} doc/files/project_intro.html punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html sa,punkshell(n) doc/files/main.html filesystem {index.html filesystem} sa,punkshell doc/files/main.html kw,shell {index.html shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell__project_changes(n) doc/files/project_changes.html kw,path {index.html path} kw,module {index.html module} punkshell(n) doc/files/main.html kw,plugin {index.html plugin} punkshell doc/files/main.html punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html changelog {index.html changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html punkshell__project_changes(n) doc/files/project_changes.html sa,punkshell__project_changes doc/files/project_changes.html path {index.html path} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_changes doc/files/project_changes.html kw,filesystem {index.html filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html shell {index.html shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html kw,repl {index.html repl} capability {index.html capability} punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell - Core} doc/files/main.html {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.html repl {index.html repl} kw,punk {index.html punk} sa,punkshell__project_intro(n) doc/files/project_intro.html sa,punkshell__project_intro doc/files/project_intro.html {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_intro(n) doc/files/project_intro.html punkshell__project_intro doc/files/project_intro.html kw,changelog {index.html changelog} punk {index.html punk} module {index.html module} plugin {index.html plugin} |
kw,capability {index.html capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell Changes} doc/files/project_changes.html {Introduction to punkshell} doc/files/project_intro.html sa,punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html sa,punkshell(n) doc/files/main.html filesystem {index.html filesystem} sa,punkshell doc/files/main.html kw,shell {index.html shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html kw,parse {index.html parse} sa,punkshell__project_changes(n) doc/files/project_changes.html kw,path {index.html path} kw,module {index.html module} punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.html punkshell(n) doc/files/main.html kw,plugin {index.html plugin} punkshell doc/files/main.html kw,file {index.html file} punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html changelog {index.html changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html punkshell__project_changes(n) doc/files/project_changes.html sa,punkshell__project_changes doc/files/project_changes.html path {index.html path} file {index.html file} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_changes doc/files/project_changes.html kw,filesystem {index.html filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html shell {index.html shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html kw,repl {index.html repl} capability {index.html capability} kw,text {index.html text} parse {index.html parse} sa,punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell - Core} doc/files/main.html {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.html repl {index.html repl} punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.html kw,punk {index.html punk} sa,punkshell__project_intro(n) doc/files/project_intro.html text {index.html text} sa,punkshell__project_intro doc/files/project_intro.html {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_intro(n) doc/files/project_intro.html {file line-handling utilities} doc/files/punk/_module_fileline-0.1.0.tm.html punkshell__project_intro doc/files/project_intro.html kw,changelog {index.html changelog} module {index.html module} punk {index.html punk} plugin {index.html plugin} |
@ -0,0 +1,326 @@ |
|||||||
|
<!DOCTYPE html><html><head> |
||||||
|
<title>punkshell_module_punk::fileline - punk fileline</title> |
||||||
|
<style type="text/css"><!-- |
||||||
|
HTML { |
||||||
|
background: #FFFFFF; |
||||||
|
color: black; |
||||||
|
} |
||||||
|
BODY { |
||||||
|
background: #FFFFFF; |
||||||
|
color: black; |
||||||
|
} |
||||||
|
DIV.doctools { |
||||||
|
margin-left: 10%; |
||||||
|
margin-right: 10%; |
||||||
|
} |
||||||
|
DIV.doctools H1,DIV.doctools H2 { |
||||||
|
margin-left: -5%; |
||||||
|
} |
||||||
|
H1, H2, H3, H4 { |
||||||
|
margin-top: 1em; |
||||||
|
font-family: sans-serif; |
||||||
|
font-size: large; |
||||||
|
color: #005A9C; |
||||||
|
background: transparent; |
||||||
|
text-align: left; |
||||||
|
} |
||||||
|
H1.doctools_title { |
||||||
|
text-align: center; |
||||||
|
} |
||||||
|
UL,OL { |
||||||
|
margin-right: 0em; |
||||||
|
margin-top: 3pt; |
||||||
|
margin-bottom: 3pt; |
||||||
|
} |
||||||
|
UL LI { |
||||||
|
list-style: disc; |
||||||
|
} |
||||||
|
OL LI { |
||||||
|
list-style: decimal; |
||||||
|
} |
||||||
|
DT { |
||||||
|
padding-top: 1ex; |
||||||
|
} |
||||||
|
UL.doctools_toc,UL.doctools_toc UL, UL.doctools_toc UL UL { |
||||||
|
font: normal 12pt/14pt sans-serif; |
||||||
|
list-style: none; |
||||||
|
} |
||||||
|
LI.doctools_section, LI.doctools_subsection { |
||||||
|
list-style: none; |
||||||
|
margin-left: 0em; |
||||||
|
text-indent: 0em; |
||||||
|
padding: 0em; |
||||||
|
} |
||||||
|
PRE { |
||||||
|
display: block; |
||||||
|
font-family: monospace; |
||||||
|
white-space: pre; |
||||||
|
margin: 0%; |
||||||
|
padding-top: 0.5ex; |
||||||
|
padding-bottom: 0.5ex; |
||||||
|
padding-left: 1ex; |
||||||
|
padding-right: 1ex; |
||||||
|
width: 100%; |
||||||
|
} |
||||||
|
PRE.doctools_example { |
||||||
|
color: black; |
||||||
|
background: #f5dcb3; |
||||||
|
border: 1px solid black; |
||||||
|
} |
||||||
|
UL.doctools_requirements LI, UL.doctools_syntax LI { |
||||||
|
list-style: none; |
||||||
|
margin-left: 0em; |
||||||
|
text-indent: 0em; |
||||||
|
padding: 0em; |
||||||
|
} |
||||||
|
DIV.doctools_synopsis { |
||||||
|
color: black; |
||||||
|
background: #80ffff; |
||||||
|
border: 1px solid black; |
||||||
|
font-family: serif; |
||||||
|
margin-top: 1em; |
||||||
|
margin-bottom: 1em; |
||||||
|
} |
||||||
|
UL.doctools_syntax { |
||||||
|
margin-top: 1em; |
||||||
|
border-top: 1px solid black; |
||||||
|
} |
||||||
|
UL.doctools_requirements { |
||||||
|
margin-bottom: 1em; |
||||||
|
border-bottom: 1px solid black; |
||||||
|
} |
||||||
|
--></style> |
||||||
|
</head> |
||||||
|
<!-- Generated from file '_module_fileline-0.1.0.tm.man' by tcllib/doctools with format 'html' |
||||||
|
--> |
||||||
|
<!-- Copyright &copy; 2024 |
||||||
|
--> |
||||||
|
<!-- punkshell_module_punk::fileline.0 |
||||||
|
--> |
||||||
|
<body><hr> [ |
||||||
|
<a href="../../../toc.html">Main Table Of Contents</a> |
||||||
|
| <a href="../../toc.html">Table Of Contents</a> |
||||||
|
| <a href="../../../index.html">Keyword Index</a> |
||||||
|
] <hr> |
||||||
|
<div class="doctools"> |
||||||
|
<h1 class="doctools_title">punkshell_module_punk::fileline(0) 0.1.0 doc "punk fileline"</h1> |
||||||
|
<div id="name" class="doctools_section"><h2><a name="name">Name</a></h2> |
||||||
|
<p>punkshell_module_punk::fileline - file line-handling utilities</p> |
||||||
|
</div> |
||||||
|
<div id="toc" class="doctools_section"><h2><a name="toc">Table Of Contents</a></h2> |
||||||
|
<ul class="doctools_toc"> |
||||||
|
<li class="doctools_section"><a href="#toc">Table Of Contents</a></li> |
||||||
|
<li class="doctools_section"><a href="#synopsis">Synopsis</a></li> |
||||||
|
<li class="doctools_section"><a href="#section1">Description</a></li> |
||||||
|
<li class="doctools_section"><a href="#section2">Overview</a> |
||||||
|
<ul> |
||||||
|
<li class="doctools_subsection"><a href="#subsection1">Concepts</a></li> |
||||||
|
<li class="doctools_subsection"><a href="#subsection2">Notes</a></li> |
||||||
|
<li class="doctools_subsection"><a href="#subsection3">dependencies</a></li> |
||||||
|
</ul> |
||||||
|
</li> |
||||||
|
<li class="doctools_section"><a href="#section3">API</a> |
||||||
|
<ul> |
||||||
|
<li class="doctools_subsection"><a href="#subsection4">Namespace punk::fileline::class</a></li> |
||||||
|
<li class="doctools_subsection"><a href="#subsection5">Namespace punk::fileline</a></li> |
||||||
|
<li class="doctools_subsection"><a href="#subsection6">Namespace punk::fileline::lib</a></li> |
||||||
|
</ul> |
||||||
|
</li> |
||||||
|
<li class="doctools_section"><a href="#section4">Internal</a> |
||||||
|
<ul> |
||||||
|
<li class="doctools_subsection"><a href="#subsection7">Namespace punk::fileline::system</a></li> |
||||||
|
</ul> |
||||||
|
</li> |
||||||
|
<li class="doctools_section"><a href="#keywords">Keywords</a></li> |
||||||
|
<li class="doctools_section"><a href="#copyright">Copyright</a></li> |
||||||
|
</ul> |
||||||
|
</div> |
||||||
|
<div id="synopsis" class="doctools_section"><h2><a name="synopsis">Synopsis</a></h2> |
||||||
|
<div class="doctools_synopsis"> |
||||||
|
<ul class="doctools_requirements"> |
||||||
|
<li>package require <b class="pkgname">punk::fileline</b></li> |
||||||
|
</ul> |
||||||
|
<ul class="doctools_syntax"> |
||||||
|
<li><a href="#1">class::textinfo <b class="method">constructor</b> <i class="arg">datachunk</i> <span class="opt">?option value...?</span></a></li> |
||||||
|
<li><a href="#2">class::textinfo <b class="method">chunk</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></li> |
||||||
|
<li><a href="#3">class::textinfo <b class="method">chunklen</b></a></li> |
||||||
|
<li><a href="#4">class::textinfo <b class="method">linecount</b></a></li> |
||||||
|
<li><a href="#5">class::textinfo <b class="method">regenerate_lines</b></a></li> |
||||||
|
<li><a href="#6">class::textinfo <b class="method">line</b> <i class="arg">lineindex</i></a></li> |
||||||
|
<li><a href="#7">class::textinfo <b class="method">linepayload</b> <i class="arg">lineindex</i></a></li> |
||||||
|
<li><a href="#8">class::textinfo <b class="method">linemeta</b> <i class="arg">lineindex</i></a></li> |
||||||
|
<li><a href="#9">class::textinfo <b class="method">lineinfo</b> <i class="arg">lineindex</i></a></li> |
||||||
|
<li><a href="#10">class::textinfo <b class="method">linerange_to_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||||
|
<li><a href="#11">class::textinfo <b class="method">linerange_to_chunk</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||||
|
<li><a href="#12">class::textinfo <b class="method">lines</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||||
|
<li><a href="#13">class::textinfo <b class="method">linepayloads</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||||
|
<li><a href="#14">class::textinfo <b class="method">chunkrange_to_linerange</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></li> |
||||||
|
<li><a href="#15">class::textinfo <b class="method">chunkrange_to_lineinfolist</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i> <span class="opt">?option value...?</span></a></li> |
||||||
|
<li><a href="#16">class::textinfo <b class="method">numeric_linerange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||||
|
<li><a href="#17">class::textinfo <b class="method">numeric_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></li> |
||||||
|
<li><a href="#18">class::textinfo <b class="method">normalize_indices</b> <i class="arg">startidx</i> <i class="arg">endidx</i> <i class="arg">max</i></a></li> |
||||||
|
<li><a href="#19"><b class="function">lib::range_spans_chunk_boundaries</b> <i class="arg">start</i> <i class="arg">end</i> <i class="arg">chunksize</i></a></li> |
||||||
|
</ul> |
||||||
|
</div> |
||||||
|
</div> |
||||||
|
<div id="section1" class="doctools_section"><h2><a name="section1">Description</a></h2> |
||||||
|
<p>-</p> |
||||||
|
</div> |
||||||
|
<div id="section2" class="doctools_section"><h2><a name="section2">Overview</a></h2> |
||||||
|
<p>Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed)</p> |
||||||
|
<p>This is important for certain text files where examining the number of chars/bytes is important</p> |
||||||
|
<p>For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved.</p> |
||||||
|
<p>Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data.</p> |
||||||
|
<div id="subsection1" class="doctools_subsection"><h3><a name="subsection1">Concepts</a></h3> |
||||||
|
<p>A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation.</p> |
||||||
|
<pre class="doctools_example"> |
||||||
|
package require punk::fileline |
||||||
|
package require fileutil |
||||||
|
set rawdata [fileutil::cat data.txt -translation binary] |
||||||
|
punk::fileline::class::textinfo create obj_data $rawdata |
||||||
|
puts stdout [obj_data linecount] |
||||||
|
</pre> |
||||||
|
</div> |
||||||
|
<div id="subsection2" class="doctools_subsection"><h3><a name="subsection2">Notes</a></h3> |
||||||
|
<p>Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files.</p> |
||||||
|
<p>This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired.</p> |
||||||
|
<p>No support for lone carriage-returns being interpreted as line-endings.</p> |
||||||
|
<p>CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module.</p> |
||||||
|
</div> |
||||||
|
<div id="subsection3" class="doctools_subsection"><h3><a name="subsection3">dependencies</a></h3> |
||||||
|
<p>packages used by punk::fileline</p> |
||||||
|
<ul class="doctools_itemized"> |
||||||
|
<li><p><b class="package">Tcl 8.6</b></p></li> |
||||||
|
</ul> |
||||||
|
</div> |
||||||
|
</div> |
||||||
|
<div id="section3" class="doctools_section"><h2><a name="section3">API</a></h2> |
||||||
|
<div id="subsection4" class="doctools_subsection"><h3><a name="subsection4">Namespace punk::fileline::class</a></h3> |
||||||
|
<p>class definitions</p> |
||||||
|
<ol class="doctools_enumerated"> |
||||||
|
<li><p>CLASS <b class="class">textinfo</b></p> |
||||||
|
<dl class="doctools_definitions"> |
||||||
|
<p><em>METHODS</em></p> |
||||||
|
<dt><a name="1">class::textinfo <b class="method">constructor</b> <i class="arg">datachunk</i> <span class="opt">?option value...?</span></a></dt> |
||||||
|
<dd><p>Constructor for textinfo object which represents a chunk or all of a file</p> |
||||||
|
<p>datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like:</p> |
||||||
|
<pre class="doctools_example"> |
||||||
|
fconfigure $fd -translation binary |
||||||
|
set chunkdata [read $fd]] |
||||||
|
or |
||||||
|
set chunkdata [fileutil::cat <filename> -translation binary] |
||||||
|
</pre> |
||||||
|
<p>when loading the data</p></dd> |
||||||
|
<dt><a name="2">class::textinfo <b class="method">chunk</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></dt> |
||||||
|
<dd><p>Return a range of bytes from the underlying raw chunk data.</p> |
||||||
|
<p>e.g The following retrieves the entire chunk</p> |
||||||
|
<p>objName chunk 0 end</p></dd> |
||||||
|
<dt><a name="3">class::textinfo <b class="method">chunklen</b></a></dt> |
||||||
|
<dd><p>Number of bytes/characters in the raw data of the file</p></dd> |
||||||
|
<dt><a name="4">class::textinfo <b class="method">linecount</b></a></dt> |
||||||
|
<dd><p>Number of lines in the raw data of the file, counted as per the policy in effect</p></dd> |
||||||
|
<dt><a name="5">class::textinfo <b class="method">regenerate_lines</b></a></dt> |
||||||
|
<dd><p>generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex</p></dd> |
||||||
|
<dt><a name="6">class::textinfo <b class="method">line</b> <i class="arg">lineindex</i></a></dt> |
||||||
|
<dd><p>Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata</p> |
||||||
|
<p>A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting)</p> |
||||||
|
<p>Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none"</p> |
||||||
|
<p>To return just the data which might more commonly be needed for dealing with lines, use the <b class="method">linepayload</b> method - which returns the line data minus line-ending</p></dd> |
||||||
|
<dt><a name="7">class::textinfo <b class="method">linepayload</b> <i class="arg">lineindex</i></a></dt> |
||||||
|
<dd><p>Return the text of the line indicated by the zero-based lineindex</p> |
||||||
|
<p>The line-ending is not returned in the data - but is still stored against this lineindex</p> |
||||||
|
<p>Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the <b class="method">linemeta</b> method</p> |
||||||
|
<p>To retrieve both the line text and metadata in a single call the <b class="method">lineinfo</b> method can be used</p> |
||||||
|
<p>To retrieve an entire line including line-ending use the <b class="method">line</b> method.</p></dd> |
||||||
|
<dt><a name="8">class::textinfo <b class="method">linemeta</b> <i class="arg">lineindex</i></a></dt> |
||||||
|
<dd><p>Return a dict of the metadata for the line indicated by the zero-based lineindex</p> |
||||||
|
<p>Keys returned include</p> |
||||||
|
<ul class="doctools_itemized"> |
||||||
|
<li><p>le</p> |
||||||
|
<p>A string representing the type of line-ending: crlf|lf|none</p></li> |
||||||
|
<li><p>linelen</p> |
||||||
|
<p>The number of characters/bytes in the whole line including line-ending if any</p></li> |
||||||
|
<li><p>payloadlen</p> |
||||||
|
<p>The number of character/bytes in the line excluding line-ending</p></li> |
||||||
|
<li><p>start</p> |
||||||
|
<p>The zero-based index into the associated raw file data indicating at which byte/character index this line begins</p></li> |
||||||
|
<li><p>end</p> |
||||||
|
<p>The zero-based index into the associated raw file data indicating at which byte/character index this line ends</p> |
||||||
|
<p>This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload</p></li> |
||||||
|
</ul></dd> |
||||||
|
<dt><a name="9">class::textinfo <b class="method">lineinfo</b> <i class="arg">lineindex</i></a></dt> |
||||||
|
<dd><p>Return a dict of the metadata and text for the line indicated by the zero-based lineindex</p> |
||||||
|
<p>This returns the same info as the <b class="method">linemeta</b> with an added key of 'payload' which is the text of the line without line-ending.</p> |
||||||
|
<p>The 'payload' value is the same as is returned from the <b class="method">linepayload</b> method.</p></dd> |
||||||
|
<dt><a name="10">class::textinfo <b class="method">linerange_to_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||||
|
<dd></dd> |
||||||
|
<dt><a name="11">class::textinfo <b class="method">linerange_to_chunk</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||||
|
<dd></dd> |
||||||
|
<dt><a name="12">class::textinfo <b class="method">lines</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||||
|
<dd></dd> |
||||||
|
<dt><a name="13">class::textinfo <b class="method">linepayloads</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||||
|
<dd></dd> |
||||||
|
<dt><a name="14">class::textinfo <b class="method">chunkrange_to_linerange</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i></a></dt> |
||||||
|
<dd></dd> |
||||||
|
<dt><a name="15">class::textinfo <b class="method">chunkrange_to_lineinfolist</b> <i class="arg">chunkstart</i> <i class="arg">chunkend</i> <span class="opt">?option value...?</span></a></dt> |
||||||
|
<dd><p>Return a list of dicts each with structure like the result of the <b class="method">lineinfo</b> method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied</p> |
||||||
|
<p>The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list.</p> |
||||||
|
<p>truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending)</p> |
||||||
|
<p>Note that this truncation info is only in the return value of this method - and will not be reflected in <b class="method">lineinfo</b> queries to the main chunk.</p></dd> |
||||||
|
<dt><a name="16">class::textinfo <b class="method">numeric_linerange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||||
|
<dd><p>A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data</p> |
||||||
|
<p>This is used internally by API functions such as <b class="method">line</b> to enable it to accept more expressive indices</p></dd> |
||||||
|
<dt><a name="17">class::textinfo <b class="method">numeric_chunkrange</b> <i class="arg">startidx</i> <i class="arg">endidx</i></a></dt> |
||||||
|
<dd><p>A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data</p></dd> |
||||||
|
<dt><a name="18">class::textinfo <b class="method">normalize_indices</b> <i class="arg">startidx</i> <i class="arg">endidx</i> <i class="arg">max</i></a></dt> |
||||||
|
<dd><p>A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max</p> |
||||||
|
<p>Basic addition and subtraction expressions such as 4-1 5+2 are accepted</p> |
||||||
|
<p>startidx higher than endidx is allowed</p> |
||||||
|
<p>Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max</p></dd> |
||||||
|
</dl> |
||||||
|
</li> |
||||||
|
</ol> |
||||||
|
</div> |
||||||
|
<div id="subsection5" class="doctools_subsection"><h3><a name="subsection5">Namespace punk::fileline</a></h3> |
||||||
|
<p>Core API functions for punk::fileline</p> |
||||||
|
<dl class="doctools_definitions"> |
||||||
|
</dl> |
||||||
|
</div> |
||||||
|
<div id="subsection6" class="doctools_subsection"><h3><a name="subsection6">Namespace punk::fileline::lib</a></h3> |
||||||
|
<p>Secondary functions that are part of the API</p> |
||||||
|
<dl class="doctools_definitions"> |
||||||
|
<dt><a name="19"><b class="function">lib::range_spans_chunk_boundaries</b> <i class="arg">start</i> <i class="arg">end</i> <i class="arg">chunksize</i></a></dt> |
||||||
|
<dd><p>Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range.</p> |
||||||
|
<dl class="doctools_arguments"> |
||||||
|
|
||||||
|
<dt>integer <i class="arg">start</i></dt> |
||||||
|
<dd><p>zero-based start index of range</p></dd> |
||||||
|
<dt>integer <i class="arg">end</i></dt> |
||||||
|
<dd><p>zero-based end index of range</p></dd> |
||||||
|
<dt>integer <i class="arg">chunksize</i></dt> |
||||||
|
<dd><p>Number of bytes/characters in chunk</p></dd> |
||||||
|
</dl> |
||||||
|
<p>returns a dict with the keys is_span and boundaries</p> |
||||||
|
<p>is_span 0|1 indicates if the range specified spans a boundary of chunksize</p> |
||||||
|
<p>boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize</p> |
||||||
|
<p>e.g</p> |
||||||
|
<pre class="doctools_example"> |
||||||
|
range_spans_chunk_boundaries 10 1750 512 |
||||||
|
is_span 1 boundaries {512 1024 1536} |
||||||
|
</pre> |
||||||
|
<p>This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75</p></dd> |
||||||
|
</dl> |
||||||
|
</div> |
||||||
|
</div> |
||||||
|
<div id="section4" class="doctools_section"><h2><a name="section4">Internal</a></h2> |
||||||
|
<div id="subsection7" class="doctools_subsection"><h3><a name="subsection7">Namespace punk::fileline::system</a></h3> |
||||||
|
<p>Internal functions that are not part of the API</p> |
||||||
|
</div> |
||||||
|
</div> |
||||||
|
<div id="keywords" class="doctools_section"><h2><a name="keywords">Keywords</a></h2> |
||||||
|
<p><a href="../../../index.html#file">file</a>, <a href="../../../index.html#module">module</a>, <a href="../../../index.html#parse">parse</a>, <a href="../../../index.html#text">text</a></p> |
||||||
|
</div> |
||||||
|
<div id="copyright" class="doctools_section"><h2><a name="copyright">Copyright</a></h2> |
||||||
|
<p>Copyright © 2024</p> |
||||||
|
</div> |
||||||
|
</div></body></html> |
@ -0,0 +1,829 @@ |
|||||||
|
# -*- tcl -*- |
||||||
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
||||||
|
# |
||||||
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
||||||
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
# (C) 2024 |
||||||
|
# |
||||||
|
# @@ Meta Begin |
||||||
|
# Application punk::fileline 999999.0a1.0 |
||||||
|
# Meta platform tcl |
||||||
|
# Meta license BSD |
||||||
|
# @@ Meta End |
||||||
|
|
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
# doctools header |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
#*** !doctools |
||||||
|
#[manpage_begin punkshell_module_punk::fileline 0 999999.0a1.0] |
||||||
|
#[copyright "2024"] |
||||||
|
#[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] |
||||||
|
#[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] |
||||||
|
#[require punk::fileline] |
||||||
|
#[keywords module text parse file] |
||||||
|
#[description] |
||||||
|
#[para] - |
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[section Overview] |
||||||
|
#[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) |
||||||
|
#[para]This is important for certain text files where examining the number of chars/bytes is important |
||||||
|
#[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. |
||||||
|
#[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. |
||||||
|
#[subsection Concepts] |
||||||
|
#[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. |
||||||
|
#[example_begin] |
||||||
|
# package require punk::fileline |
||||||
|
# package require fileutil |
||||||
|
# set rawdata [lb]fileutil::cat data.txt -translation binary[rb] |
||||||
|
# punk::fileline::class::textinfo create obj_data $rawdata |
||||||
|
# puts stdout [lb]obj_data linecount[rb] |
||||||
|
#[example_end] |
||||||
|
#[subsection Notes] |
||||||
|
#[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. |
||||||
|
#[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. |
||||||
|
#[para]No support for lone carriage-returns being interpreted as line-endings. |
||||||
|
#[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. |
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
## Requirements |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[subsection dependencies] |
||||||
|
#[para] packages used by punk::fileline |
||||||
|
#[list_begin itemized] |
||||||
|
|
||||||
|
package require Tcl 8.6 |
||||||
|
#*** !doctools |
||||||
|
#[item] [package {Tcl 8.6}] |
||||||
|
|
||||||
|
# #package require frobz |
||||||
|
# #*** !doctools |
||||||
|
# #[item] [package {frobz}] |
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[list_end] |
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[section API] |
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
# oo::class namespace |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
namespace eval punk::fileline::class { |
||||||
|
namespace export * |
||||||
|
#*** !doctools |
||||||
|
#[subsection {Namespace punk::fileline::class}] |
||||||
|
#[para] class definitions |
||||||
|
if {[info commands [namespace current]::textinfo] eq ""} { |
||||||
|
#*** !doctools |
||||||
|
#[list_begin enumerated] |
||||||
|
|
||||||
|
# oo::class create interface_sample1 { |
||||||
|
# #*** !doctools |
||||||
|
# #[enum] CLASS [class interface_sample1] |
||||||
|
# #[list_begin definitions] |
||||||
|
|
||||||
|
# method test {arg1} { |
||||||
|
# #*** !doctools |
||||||
|
# #[call class::interface_sample1 [method test] [arg arg1]] |
||||||
|
# #[para] test method |
||||||
|
# puts "test: $arg1" |
||||||
|
# } |
||||||
|
|
||||||
|
# #*** !doctools |
||||||
|
# #[list_end] [comment {-- end definitions interface_sample1}] |
||||||
|
# } |
||||||
|
|
||||||
|
|
||||||
|
#uses zero based indexing. Caller can add 1 for line numbers |
||||||
|
oo::class create [namespace current]::textinfo { |
||||||
|
#*** !doctools |
||||||
|
#[enum] CLASS [class textinfo] |
||||||
|
#[list_begin definitions] |
||||||
|
# [para] [emph METHODS] |
||||||
|
|
||||||
|
variable o_chunk |
||||||
|
variable o_chunk_epoch |
||||||
|
variable o_payloadlist |
||||||
|
variable o_linemap |
||||||
|
variable o_line_epoch |
||||||
|
variable o_LF_C |
||||||
|
variable o_CRLF_C |
||||||
|
|
||||||
|
constructor {datachunk args} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] |
||||||
|
#[para] Constructor for textinfo object which represents a chunk or all of a file |
||||||
|
#[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: |
||||||
|
#[example_begin] |
||||||
|
# fconfigure $fd -translation binary |
||||||
|
# set chunkdata [lb]read $fd[rb]] |
||||||
|
#or |
||||||
|
# set chunkdata [lb]fileutil::cat <filename> -translation binary[rb] |
||||||
|
#[example_end] |
||||||
|
#[para] when loading the data |
||||||
|
set o_chunk $datachunk |
||||||
|
set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message |
||||||
|
set defaults [dict create\ |
||||||
|
-substitutionmap {}\ |
||||||
|
-crlf_lf_placeholders $crlf_lf_placeholders\ |
||||||
|
] |
||||||
|
set known_opts [dict keys $defaults] |
||||||
|
foreach {k v} $args { |
||||||
|
if {$k ni $known_opts} { |
||||||
|
error "textinfo::constructor error: unknown option '$k'. Known options: $known_opts" |
||||||
|
} |
||||||
|
} |
||||||
|
set opts [dict merge $defaults $args] |
||||||
|
# -- --- --- --- --- --- --- |
||||||
|
set opt_substitutionmap [dict get $opts -substitutionmap] ;#review - can be done by caller - or a loadable -policy |
||||||
|
set opt_crlf_lf_placeholders [dict get $opts -crlf_lf_placeholders] |
||||||
|
# -- --- --- --- --- --- --- |
||||||
|
|
||||||
|
if {[llength $opt_crlf_lf_placeholders] != 2 || [string length [lindex $opt_crlf_lf_placeholders 0]] !=1 || [string length [lindex $opt_crlf_lf_placeholders 1]] !=1} { |
||||||
|
error "textinfo::constructor error: -crlf_lf_placeholders requires a list of exactly 2 chars" |
||||||
|
} |
||||||
|
lassign $opt_crlf_lf_placeholders o_LF_C o_CRLF_C |
||||||
|
if {[string first $o_LF_C $o_chunk] >=0} { |
||||||
|
set decval [scan $o_LF_C %c] |
||||||
|
if {$decval < 32 || $decval > 127} { |
||||||
|
set char_desc "(decimal value $decval)" |
||||||
|
} else { |
||||||
|
set char_desc "'$o_LF_C' (decimal value $decval)" |
||||||
|
} |
||||||
|
error "textinfo::constructor error: rawfiledata already contains linefeed substitution character $char_desc specified as first element of -crlf_lf_placeholders" |
||||||
|
} |
||||||
|
if {[string first $o_CRLF_C $o_chunk] >=0} { |
||||||
|
set decval [scan $o_CRLF_C %c] |
||||||
|
if {$decval < 32 || $decval > 127} { |
||||||
|
set char_desc "(decimal value $decval)" |
||||||
|
} else { |
||||||
|
set char_desc "'$o_CRLF_C' (decimal value $decval)" |
||||||
|
} |
||||||
|
error "textinfo::constructor error: rawfiledata already contains carriagereturn-linefeed substitution character $char_desc specified as second element of -crlf_lf_placeholders" |
||||||
|
} |
||||||
|
if {$o_LF_C eq $o_CRLF_C} { |
||||||
|
puts stderr "WARNING: same substitution character used for both elements of -crlf_lf_placeholders - byte counting may be off if file contains mixed line-endings" |
||||||
|
} |
||||||
|
set o_chunk_epoch "initial" |
||||||
|
set o_line_epoch "" |
||||||
|
my regenerate_lines |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
method chunk {chunkstart chunkend} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] |
||||||
|
#[para]Return a range of bytes from the underlying raw chunk data. |
||||||
|
#[para] e.g The following retrieves the entire chunk |
||||||
|
#[para] objName chunk 0 end |
||||||
|
return [string range $o_chunk $chunkstart $chunkend] |
||||||
|
} |
||||||
|
method chunklen {} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method chunklen]] |
||||||
|
#[para] Number of bytes/characters in the raw data of the file |
||||||
|
return [string length $o_chunk] |
||||||
|
} |
||||||
|
method linecount {} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method linecount]] |
||||||
|
#[para] Number of lines in the raw data of the file, counted as per the policy in effect |
||||||
|
return [llength $o_payloadlist] |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
method line {lineindex} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method line] [arg lineindex]] |
||||||
|
#[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata |
||||||
|
#[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) |
||||||
|
#[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" |
||||||
|
#[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending |
||||||
|
|
||||||
|
lassign [my numeric_linerange $lineindex 0] lineindex |
||||||
|
|
||||||
|
set le [dict get $o_linemap $lineindex le] |
||||||
|
set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] |
||||||
|
return [lindex $o_payloadlist $lineindex]$le_chars |
||||||
|
} |
||||||
|
method linepayload {lineindex} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method linepayload] [arg lineindex]] |
||||||
|
#[para]Return the text of the line indicated by the zero-based lineindex |
||||||
|
#[para]The line-ending is not returned in the data - but is still stored against this lineindex |
||||||
|
#[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method |
||||||
|
#[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used |
||||||
|
#[para]To retrieve an entire line including line-ending use the [method line] method. |
||||||
|
lassign [my numeric_linerange $lineindex 0] lineindex |
||||||
|
return [lindex $o_payloadlist $lineindex] |
||||||
|
} |
||||||
|
method linemeta {lineindex} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method linemeta] [arg lineindex]] |
||||||
|
#[para]Return a dict of the metadata for the line indicated by the zero-based lineindex |
||||||
|
#[para]Keys returned include |
||||||
|
#[list_begin itemized] |
||||||
|
#[item] le |
||||||
|
#[para] A string representing the type of line-ending: crlf|lf|none |
||||||
|
#[item] linelen |
||||||
|
#[para] The number of characters/bytes in the whole line including line-ending if any |
||||||
|
#[item] payloadlen |
||||||
|
#[para] The number of character/bytes in the line excluding line-ending |
||||||
|
#[item] start |
||||||
|
#[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins |
||||||
|
#[item] end |
||||||
|
#[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends |
||||||
|
#[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload |
||||||
|
#[list_end] |
||||||
|
lassign [my numeric_linerange $lineindex 0] lineindex |
||||||
|
dict get $o_linemap $lineindex |
||||||
|
} |
||||||
|
method lineinfo {lineindex} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method lineinfo] [arg lineindex]] |
||||||
|
#[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex |
||||||
|
#[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. |
||||||
|
#[para]The 'payload' value is the same as is returned from the [method linepayload] method. |
||||||
|
lassign [my numeric_linerange $lineindex 0] lineindex ;#convert lineindex to canonical number e.g 1_000 -> 1000 end -> highest index |
||||||
|
return [dict create lineindex $lineindex {*}[dict get $o_linemap $lineindex] payload [lindex $o_payloadlist $lineindex]] |
||||||
|
} |
||||||
|
method lineinfolist {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]] |
||||||
|
#[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx |
||||||
|
lassign [my numeric_linerange $startidx $endidx] startidx endidx |
||||||
|
set chunkstart [dict get $o_linemap $startidx start] |
||||||
|
set chunkend [dict get $o_linemap $endidx end] |
||||||
|
set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assert - no need to view truncations as we've picked start and end of complete lines |
||||||
|
#verify sanity |
||||||
|
set l_start [lindex $line_list 0] |
||||||
|
if {[set idx_start [dict get $l_start lineindex]] ne $startidx} { |
||||||
|
error "lineinfolist first lineindex $idx_start doesn't match startidx $startidx" |
||||||
|
} |
||||||
|
set l_end [lindex $line_list end] |
||||||
|
if {[set idx_end [dict get $l_end lineindex]] ne $endidx} { |
||||||
|
error "lineinfolist last lineindex $idx_end doesn't match endidx $endidx" |
||||||
|
} |
||||||
|
return $line_list |
||||||
|
} |
||||||
|
|
||||||
|
method linerange_to_chunkrange {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] |
||||||
|
|
||||||
|
lassign [my numeric_linerange $startidx $endidx] startidx endidx |
||||||
|
#inclusive range |
||||||
|
return [list [dict get $o_linemap $startidx start] [dict get $o_linemap $endidx end]] |
||||||
|
} |
||||||
|
method linerange_to_chunk {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] |
||||||
|
set chunkrange [my linerange_to_chunkrange $startidx $endidx] |
||||||
|
return [string range $o_chunk [lindex $chunkrange 0] [lindex $chunkrange 1]] |
||||||
|
} |
||||||
|
method lines {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method lines] [arg startidx] [arg endidx]] |
||||||
|
lassign [my numeric_linerange $startidx $endidx] startidx endidx |
||||||
|
set linelist [list] |
||||||
|
set le_map [dict create lf \n crlf \r\n none ""] |
||||||
|
for {set i $startidx} {$i <= $endidx} {incr i} { |
||||||
|
lappend linelist "[lindex $o_payloadlist $i][dict get $le_map [dict get $o_linemap $i le]]" |
||||||
|
} |
||||||
|
return $linelist |
||||||
|
} |
||||||
|
method linepayloads {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] |
||||||
|
return [lrange $o_payloadlist $startidx $endidx] |
||||||
|
} |
||||||
|
method chunkrange_to_linerange {chunkstart chunkend} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] |
||||||
|
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend |
||||||
|
|
||||||
|
set linestart -1 |
||||||
|
for {set i 0} {$i < [llength $o_payloadlist]} {incr i} { |
||||||
|
if {($chunkstart >= [dict get $o_linemap $i start]) && ($chunkstart <= [dict get $o_linemap $i end])} { |
||||||
|
set linestart $i |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if {$linestart == -1} { |
||||||
|
error "Line with range in chunk spanning start index $chunkstart not found" |
||||||
|
} |
||||||
|
set lineend -1 |
||||||
|
for {set i [expr {[llength $o_payloadlist] -1}]} {$i >=0} {incr i -1} { |
||||||
|
if {($chunkend >= [dict get $o_linemap $i start]) && ($chunkend <= [dict get $o_linemap $i end])} { |
||||||
|
set lineend $i |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if {$lineend == -1} { |
||||||
|
error "Line with range spanning end index $chunkend not found" |
||||||
|
} |
||||||
|
return [list $linestart $lineend] |
||||||
|
} |
||||||
|
method chunkrange_to_lineinfolist {chunkstart chunkend args} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] |
||||||
|
#[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied |
||||||
|
#[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. |
||||||
|
#[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) |
||||||
|
#[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. |
||||||
|
|
||||||
|
lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend |
||||||
|
set defaults [dict create\ |
||||||
|
-show_truncated 0\ |
||||||
|
] |
||||||
|
set known_opts [dict keys $defaults] |
||||||
|
foreach {k v} $args { |
||||||
|
if {$k ni $known_opts} { |
||||||
|
error "chunkrange_to_lines error: unknown option '$k'. Known options: $known_opts" |
||||||
|
} |
||||||
|
} |
||||||
|
set opts [dict merge $defaults $args] |
||||||
|
# -- --- --- --- --- --- --- --- |
||||||
|
set opt_show_truncated [dict get $opts -show_truncated] |
||||||
|
# -- --- --- --- --- --- --- --- |
||||||
|
|
||||||
|
set infolist [list] |
||||||
|
set linerange [my chunkrange_to_linerange $chunkstart $chunkend] |
||||||
|
lassign $linerange start_lineindex end_lineindex |
||||||
|
|
||||||
|
#if -show_truncated |
||||||
|
#return extra keys for first and last items (which may be the same item if chunkrange is entirely within a line) |
||||||
|
#add is_truncated 0|1 to all lines |
||||||
|
#Even if the start/end line is not fully within the chunkrange ie truncated - the 'payload' key will contain the original untruncated data |
||||||
|
########################### |
||||||
|
# first line may have payload tail truncated - or just linefeed, or even a split linefeed |
||||||
|
########################### |
||||||
|
set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]] |
||||||
|
set start_info [dict get $o_linemap $start_lineindex] |
||||||
|
if {$opt_show_truncated} { |
||||||
|
#line1 |
||||||
|
if {$chunkstart > [dict get $start_info start]} { |
||||||
|
#there is lhs truncation |
||||||
|
set payload [lindex $o_payloadlist $start_lineindex] |
||||||
|
set line_start [dict get $start_info start] |
||||||
|
set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $start_info le]] |
||||||
|
set payload_and_le "${payload}${le_chars}" |
||||||
|
set split [expr {$chunkstart - $line_start}] |
||||||
|
set truncated [string range $payload_and_le $split end] |
||||||
|
set lhs [string range $payload_and_le 0 $split-1] |
||||||
|
|
||||||
|
dict set first truncated $truncated |
||||||
|
dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line |
||||||
|
dict set first truncatedleft $lhs |
||||||
|
dict set first is_truncated 1 |
||||||
|
} else { |
||||||
|
dict set first is_truncated 0 |
||||||
|
} |
||||||
|
} |
||||||
|
########################### |
||||||
|
|
||||||
|
########################### |
||||||
|
# middle lines if any - no truncation |
||||||
|
########################### |
||||||
|
#difference in indexes of 1 would only mean 2 items to return |
||||||
|
set middle_list [list] |
||||||
|
if {($end_lineindex - $start_lineindex) > 1} { |
||||||
|
for {set i [expr {$start_lineindex +1}]} {$i <= [expr {$end_lineindex -1}] } {incr i} { |
||||||
|
#lineindex is key into main list |
||||||
|
lappend middle_list [dict create lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i] is_truncated 0] |
||||||
|
} |
||||||
|
} |
||||||
|
########################### |
||||||
|
|
||||||
|
########################### |
||||||
|
# tail line may have beginning or all of payload truncated - linefeed may be split if crlf |
||||||
|
# may be same line as first line - in which case truncation at beginning as well |
||||||
|
if {$end_lineindex == $start_lineindex} { |
||||||
|
#same record |
||||||
|
set end_info $start_info |
||||||
|
if {$opt_show_truncated} { |
||||||
|
if {$chunkend < [dict get $end_info end]} { |
||||||
|
#lhere is rhs truncation |
||||||
|
if {[dict get $first is_truncated]} { |
||||||
|
dict set first truncatedside [list left right] |
||||||
|
} else { |
||||||
|
dict set first is_truncated 1 |
||||||
|
dict set first truncatedside [list right] |
||||||
|
} |
||||||
|
#do rhs truncation - possibly in addition to existing lhs truncation |
||||||
|
# ... |
||||||
|
if {"left" ni [dict get $first truncatedside]} { |
||||||
|
#rhs truncation only |
||||||
|
set payload [lindex $o_payloadlist $end_lineindex] |
||||||
|
set line_start [dict get $end_info start] |
||||||
|
set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] |
||||||
|
set payload_and_le "${payload}${le_chars}" |
||||||
|
puts "payload_and_le: $payload_and_le" |
||||||
|
puts "LENGHT: [string length $payload_and_le]" |
||||||
|
#--- |
||||||
|
set split [expr {$chunkend - $line_start}] |
||||||
|
set truncated [string range $payload_and_le 0 $split] |
||||||
|
set rhs [string range $payload_and_le $split+1 end] |
||||||
|
#--- |
||||||
|
dict set first truncated $truncated |
||||||
|
dict set first truncatedside [list right] |
||||||
|
dict set first truncatedright $rhs |
||||||
|
} else { |
||||||
|
#truncated on both sides |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
#no middle or last to append |
||||||
|
lappend infolist $first |
||||||
|
} else { |
||||||
|
set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]] |
||||||
|
set end_info [dict get $o_linemap $end_lineindex] |
||||||
|
if {$opt_show_truncated} { |
||||||
|
if {$chunkend < [dict get $end_info end]} { |
||||||
|
#there is rhs truncation - and last line in range is a different line to first one |
||||||
|
dict set last is_truncated 1 |
||||||
|
set payload [lindex $o_payloadlist $end_lineindex] |
||||||
|
set line_start [dict get $end_info start] |
||||||
|
set line_end [dict get $end_info end] |
||||||
|
set le [dict get $end_info le] |
||||||
|
set le_size [dict get {lf 1 crlf 2 none 0} $le] |
||||||
|
set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] |
||||||
|
set payload_and_le "${payload}${le_chars}" |
||||||
|
|
||||||
|
set split [expr {$chunkend - $line_start}] |
||||||
|
set truncated [string range $payload_and_le 0 $split] |
||||||
|
set rhs [string range $payload_and_le $split+1 end] |
||||||
|
|
||||||
|
dict set last truncated $truncated |
||||||
|
dict set last truncatedside [list right] |
||||||
|
dict set last truncatedright $rhs |
||||||
|
#this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr |
||||||
|
#this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload' |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
lappend infolist $first |
||||||
|
if {[llength $middle_list]} { |
||||||
|
lappend infolist {*}$middle_list |
||||||
|
} |
||||||
|
lappend infolist $last |
||||||
|
} |
||||||
|
########################### |
||||||
|
|
||||||
|
return $infolist |
||||||
|
} |
||||||
|
|
||||||
|
method chunk_le_counts {chunkstart chunkend} { |
||||||
|
set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend] |
||||||
|
set lf_count 0 |
||||||
|
set crlf_count 0 |
||||||
|
set none_count 0 |
||||||
|
foreach d $infolines { |
||||||
|
set le [dict get $d le] |
||||||
|
if {$le eq "lf"} { |
||||||
|
incr lf_count |
||||||
|
} elseif {$le eq "crlf"} { |
||||||
|
incr crlf_count |
||||||
|
} else { |
||||||
|
incr none_count |
||||||
|
} |
||||||
|
} |
||||||
|
return [dict create lf $lf_count crlf $crlf_count unterminated $none_count] |
||||||
|
} |
||||||
|
|
||||||
|
#todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk |
||||||
|
method append_chunk {rawchunk} { |
||||||
|
error "sorry - unimplemented" |
||||||
|
} |
||||||
|
|
||||||
|
method numeric_linerange {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] |
||||||
|
#[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data |
||||||
|
#[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices |
||||||
|
return [my normalize_indices $startidx $endidx [expr {[dict size $o_linemap]-1}]] |
||||||
|
} |
||||||
|
method numeric_chunkrange {startidx endidx} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] |
||||||
|
#[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data |
||||||
|
return [my normalize_indices $startidx $endidx [expr {[string length $o_chunk]-1}]] |
||||||
|
} |
||||||
|
method normalize_indices {startidx endidx max} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] |
||||||
|
#[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max |
||||||
|
#[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted |
||||||
|
#[para]startidx higher than endidx is allowed |
||||||
|
#[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max |
||||||
|
set original_startidx $startidx |
||||||
|
set original_endidx $endidx |
||||||
|
set startidx [string map [list _ ""] $startidx] ;#don't barf on Tcl 8.7+ underscores in numbers - we can't just use expr because it will not handle end-x |
||||||
|
set endidx [string map [list _ ""] $endidx] |
||||||
|
if {![string is digit -strict "$startidx$endidx"]} { |
||||||
|
foreach whichvar [list start end] { |
||||||
|
upvar 0 ${whichvar}idx index |
||||||
|
if {![string is digit -strict $index]} { |
||||||
|
if {"end" eq $index} { |
||||||
|
set index $max |
||||||
|
} elseif {[string match "*-*" $index]} { |
||||||
|
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions |
||||||
|
lassign [split $index -] A B |
||||||
|
if {$A eq "end"} { |
||||||
|
set index [expr {$max - $B}] |
||||||
|
} else { |
||||||
|
set index [expr {$A - $B}] |
||||||
|
} |
||||||
|
} elseif {[string match "*+*" $index]} { |
||||||
|
lassign [split $index +] A B |
||||||
|
if {$A eq "end"} { |
||||||
|
#review - this will just result in out of bounds error in final test - as desired |
||||||
|
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all. |
||||||
|
set index [expr {$max + $B}] |
||||||
|
} else { |
||||||
|
set index [expr {$A + $B}] |
||||||
|
} |
||||||
|
} else { |
||||||
|
#May be something like +2 or -0 which braced expr can hanle |
||||||
|
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources. |
||||||
|
if {[catch {expr {$index}} index]} { |
||||||
|
#could be end+x - but we don't want out of bounds to be valid |
||||||
|
#set it to something that the final bounds expr test can deal with |
||||||
|
set index Inf |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
#Unlike Tcl lrange,lindex etc - we don't want to support out of bound indices. |
||||||
|
#show the supplied index and what it was mapped to in the error message. |
||||||
|
if {$startidx < 0 || $startidx > $max} { |
||||||
|
error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max" |
||||||
|
} |
||||||
|
if {$endidx < 0 || $endidx > $max} { |
||||||
|
error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max" |
||||||
|
} |
||||||
|
return [list $startidx $endidx] |
||||||
|
} |
||||||
|
|
||||||
|
method regenerate_lines {} { |
||||||
|
#*** !doctools |
||||||
|
#[call class::textinfo [method regenerate_lines]] |
||||||
|
#[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex |
||||||
|
#[para]This is called automatically by the Constructor during object creation |
||||||
|
#[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered. |
||||||
|
#[para]TODO - review whether such manual control will be necessary/desirable |
||||||
|
|
||||||
|
#we don't store the actual line-endings as characters (for better layout of debug/display of data) - instead we store names lf|crlf|none |
||||||
|
|
||||||
|
# first split on lf - then crlf. As we've replaced with single substution chars - the order doesn't matter. |
||||||
|
set o_payloadlist [list] |
||||||
|
set o_linemap [dict create] |
||||||
|
set crlf_replace [list \r\n $o_CRLF_C \n $o_LF_C] |
||||||
|
set normalised_data [string map $crlf_replace $o_chunk] |
||||||
|
|
||||||
|
set lf_lines [split $normalised_data $o_LF_C] |
||||||
|
|
||||||
|
set idx 0 |
||||||
|
set lf_count 0 |
||||||
|
set crlf_count 0 |
||||||
|
set filedata_offset 0 |
||||||
|
set i 0 |
||||||
|
set imax [expr {[llength $lf_lines]-1}] |
||||||
|
foreach lfln $lf_lines { |
||||||
|
set crlf_parts [split $lfln $o_CRLF_C] |
||||||
|
if {[llength $crlf_parts] <= 1} { |
||||||
|
#no crlf |
||||||
|
set payloadlen [string length $lfln] |
||||||
|
set le_size 1 |
||||||
|
set le lf |
||||||
|
if {$i == $imax} { |
||||||
|
#no more lf segments - and no crlfs |
||||||
|
if {$payloadlen > 0} { |
||||||
|
#last line in split has chars - therefore there was no trailing line-ending |
||||||
|
set le_size 0 |
||||||
|
set le none |
||||||
|
} else { |
||||||
|
#empty space after last line-ending |
||||||
|
#not really a line - we get here from splitting on our lf-replacement char |
||||||
|
#An editor might display this pseudo-line with a line number - but we won't treat it as one here |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
lappend o_payloadlist $lfln |
||||||
|
set linelen [expr {$payloadlen + $le_size}] |
||||||
|
#we include line-ending in byte count for a line. |
||||||
|
dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] |
||||||
|
incr filedata_offset $linelen |
||||||
|
incr lf_count |
||||||
|
incr idx |
||||||
|
} else { |
||||||
|
foreach crlfpart [lrange $crlf_parts 0 end-1] { |
||||||
|
lappend o_payloadlist $crlfpart |
||||||
|
set payloadlen [string length $crlfpart] |
||||||
|
set linelen [expr {$payloadlen + 2}] |
||||||
|
dict set o_linemap $idx [list le crlf linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] |
||||||
|
incr filedata_offset $linelen |
||||||
|
incr crlf_count |
||||||
|
incr idx |
||||||
|
} |
||||||
|
set lfpart [lindex $crlf_parts end] |
||||||
|
set payloadlen [string length $lfpart] |
||||||
|
if {$i == $imax} { |
||||||
|
#no more lf segments - but we did find crlf in last (or perhaps only) lf line |
||||||
|
#last element must be an empty crlf line or has no le |
||||||
|
if {$payloadlen > 0} { |
||||||
|
set le_size 0 |
||||||
|
set le none |
||||||
|
} else { |
||||||
|
#set le_size 2 |
||||||
|
#set le crlf |
||||||
|
break |
||||||
|
} |
||||||
|
} else { |
||||||
|
#more lf segments to come |
||||||
|
#last element must be an empty lf line or has no le |
||||||
|
if {$payloadlen > 0} { |
||||||
|
set le_size 0 |
||||||
|
set le none |
||||||
|
} else { |
||||||
|
set le_size 1 |
||||||
|
set le lf |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
lappend o_payloadlist $lfpart |
||||||
|
set linelen [expr {$payloadlen + $le_size}] |
||||||
|
dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] |
||||||
|
incr filedata_offset $linelen |
||||||
|
incr lf_count |
||||||
|
incr idx |
||||||
|
} |
||||||
|
incr i |
||||||
|
#incr filedata_offset ;#move up 1 so start entry for next line is greater than end entry for previous line |
||||||
|
} |
||||||
|
set le_count [expr {$lf_count + $crlf_count}] |
||||||
|
if {$le_count != [llength $o_payloadlist]} { |
||||||
|
puts stderr "fileline::class::textinfo warning. regenerate_lines lf_count: $lf_count + crlf_count: $crlf_count does not equal length of lines stored: [llength $o_payloadlist]" |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
method regenerate_chunk {} { |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[list_end] |
||||||
|
} |
||||||
|
#*** !doctools |
||||||
|
#[list_end] [comment {--- end class enumeration ---}] |
||||||
|
} |
||||||
|
} |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
# Base namespace |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
namespace eval punk::fileline { |
||||||
|
namespace export * |
||||||
|
#variable xyz |
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[subsection {Namespace punk::fileline}] |
||||||
|
#[para] Core API functions for punk::fileline |
||||||
|
#[list_begin definitions] |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[list_end] [comment {--- end definitions namespace punk::fileline ---}] |
||||||
|
} |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
|
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
# Secondary API namespace |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
namespace eval punk::fileline::lib { |
||||||
|
namespace export * |
||||||
|
namespace path [namespace parent] |
||||||
|
#*** !doctools |
||||||
|
#[subsection {Namespace punk::fileline::lib}] |
||||||
|
#[para] Secondary functions that are part of the API |
||||||
|
#[list_begin definitions] |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
proc range_spans_chunk_boundaries {start end chunksize} { |
||||||
|
#*** !doctools |
||||||
|
#[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] |
||||||
|
#[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. |
||||||
|
#[list_begin arguments] |
||||||
|
# [arg_def integer start] |
||||||
|
# [para] zero-based start index of range |
||||||
|
# [arg_def integer end] |
||||||
|
# [para] zero-based end index of range |
||||||
|
# [arg_def integer chunksize] |
||||||
|
# [para] Number of bytes/characters in chunk |
||||||
|
#[list_end] |
||||||
|
#[para]returns a dict with the keys is_span and boundaries |
||||||
|
#[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize |
||||||
|
#[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize |
||||||
|
#[para]e.g |
||||||
|
#[example_begin] |
||||||
|
# range_spans_chunk_boundaries 10 1750 512 |
||||||
|
# is_span 1 boundaries {512 1024 1536} |
||||||
|
#[example_end] |
||||||
|
#[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 |
||||||
|
if {[catch {package require Tcl 8.7}]} { |
||||||
|
#only one implementation available for older Tcl |
||||||
|
tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize |
||||||
|
} |
||||||
|
if {(($end - $start) / $chunksize) < 75} { |
||||||
|
tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize |
||||||
|
} else { |
||||||
|
tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] |
||||||
|
} |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
#*** !doctools |
||||||
|
#[section Internal] |
||||||
|
namespace eval punk::fileline::system { |
||||||
|
#*** !doctools |
||||||
|
#[subsection {Namespace punk::fileline::system}] |
||||||
|
#[para] Internal functions that are not part of the API |
||||||
|
|
||||||
|
|
||||||
|
#for 8.7+ using lseq |
||||||
|
#much faster when resultant boundary size is large |
||||||
|
proc _range_spans_chunk_boundaries_lseq {start end chunksize} { |
||||||
|
set smod [expr {$start % $chunksize}] |
||||||
|
if {$smod != 0} { |
||||||
|
set start [expr {$start + ($chunksize - $smod)}] |
||||||
|
if {$start > $end} { |
||||||
|
return [list is_span 0 boundaries {}] |
||||||
|
} |
||||||
|
} |
||||||
|
set boundaries [lseq $start to $end $chunksize] |
||||||
|
return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] |
||||||
|
} |
||||||
|
|
||||||
|
#faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case) |
||||||
|
#gets very slow (comparitively) with large resultsets |
||||||
|
proc _range_spans_chunk_boundaries_tcl {start end chunksize} { |
||||||
|
set is_span 0 |
||||||
|
set smod [expr {$start % $chunksize}] |
||||||
|
if {$smod != 0} { |
||||||
|
set start [expr {$start + ($chunksize - $smod)}] |
||||||
|
} |
||||||
|
set boundaries [list] |
||||||
|
for {set b $start} {$b <= $end} {incr b $chunksize} { |
||||||
|
lappend boundaries $b |
||||||
|
} |
||||||
|
return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] |
||||||
|
} |
||||||
|
|
||||||
|
proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} { |
||||||
|
puts "main : [time {punk::fileline::lib::range_spans_chunk_boundaries $start $end $chunksize} $repeat]" |
||||||
|
puts "tcl : [time {punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize} $repeat]" |
||||||
|
if {![catch {package require Tcl 8.7}]} { |
||||||
|
puts "lseq : [time {punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize} $repeat]" |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||||
|
## Ready |
||||||
|
package provide punk::fileline [namespace eval punk::fileline { |
||||||
|
variable pkg punk::fileline |
||||||
|
variable version |
||||||
|
set version 999999.0a1.0 |
||||||
|
}] |
||||||
|
return |
||||||
|
|
||||||
|
#*** !doctools |
||||||
|
#[manpage_end] |
||||||
|
|
@ -0,0 +1,3 @@ |
|||||||
|
0.1.0 |
||||||
|
#First line must be a semantic version number |
||||||
|
#all other lines are ignored. |
Loading…
Reference in new issue