From e72212c394dd970663e53ca003278022039f94e8 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Fri, 13 Sep 2024 02:16:31 +1000 Subject: [PATCH] tomlish quoted keys/quoted values fixes --- src/vendormodules/test/tomlish-1.1.1.tm | Bin 24693 -> 42592 bytes src/vendormodules/tomlish-1.1.1.tm | 1712 ++++++++++++++++------- 2 files changed, 1241 insertions(+), 471 deletions(-) diff --git a/src/vendormodules/test/tomlish-1.1.1.tm b/src/vendormodules/test/tomlish-1.1.1.tm index 4ea2ce3d5c130888c6d7d9839df23f375bc5cbbd..ae68fad9eaf8b9512c77dea4291c3bdf3329a5d7 100644 GIT binary patch delta 18094 zcmZ`=1yt6{)26$-yBnmtySuwXLK@_yL8MHI@K)MmW7rfpp_x?Z5 zS> zWtDYlw1C+AsDwCLDu#9o29j-fBekFpv9(1}l4CuYR0h*c!hXaZdUwkL>mPw1Aq`ba zLK%W1_mI>6#p|*c6lzL@S2`F}C91z|V6yx+$AA zh4YrLRF9h~6R~%VJm30DC_PW*GbMS2Wr4;IL(Odc?fhpV3xN!mKO@pr^+yCg7L3W1 z;ZRDqeA(4(*L_>-wTSt4@cWuK4ZPvU+*lR@bu}*PHrt$r#1Y`kC@?jLMcy?4Zw#2^ z3J0ZJ%g><;o94XfHZ*UKiHYJ@;Sm`U3T20Rc6er8w&M->i0$S4RLoZwy;S2tUOb8Sab%kUn zP8zObZP;}l-^bQxkHr>c#h0|t$-Da%u(sRGontOHBD#KB%wq`F-OaF8pQ~k7#T0#B5Z8$z4`KjZsD66qfr*1rMW>EmaFL9^6>rj31DxW*dJkI zX!}d|8Lcj{*T(ySXWv9?i*kUUbKq}go%M_v73vJN{P%0G>^&BnL{<;rU!eXJk`b9` zkWG$;>OXzkH_^Y!#vKt4FK3HR!1hH^Iy96Z37!2Yesa>Rs*<8!4}FfJQ->ngi@=we zz+A<-WPEHqqn4Ce%W&_5KRdu~{X4NveMds$!mB)5+~F`y!Aivm>7k_9j4&r&XTtFc zlx=PRiK(AAwK=VzGG!i}Xn3iF$sBe_D5Ju&AZdxTRrxdu9aOY+Lwk7+l?eWWE0|VR zuws|^Z`EI;J~6`>XrkN>?8gjx!QG*fH|T_fI2aK- z==$4P(3P_T8ivKzTPQ{+7YJrNYcRoHW4>Z z5ygRb*o4Z1)uLCHWK#l8z-K6OWF&?q#WN(^>8xSprG)DF)(qi;ccL*mC_fKl$(_Wm zS1pfxjxNrs45*lWbZ$MzM?ytS(6K^{({_o){+#_D+VnF($H@3pY!>p%hCqI# z@xT%piZVCnG@cipLlWoTxvueR+N*DhnJ*g{ z+Mmuda@)QBL}Q+Nfp@s{VOc%f(gv@P*pEd|NnHB&btvII>qamByr1Mg`?sVv;i>Pf zWCQixF$aT}!_fzWte32*M?)uOY}-tlcX5sE;G?i2?pkj1dOGo1d3>op)fnU+1c1VH*>td2L}VIA;m|AM27@H<#;^|13_`F9uW+T z3z)$64Cq1x4am?|a9-y|6IN(7XZYe$U5r6hBpxBeGT#Xy)g**v%#}?&peFH3KXHHt z?si$=Sw2Y}CTAJ3E&^Yiv-M|uw=*ey*dNBc+hJ8o^{_<-&>ngDVTB z1}yX1`Kif!j`ttD-x*!+R&MfB<*G=5G5JP$?QC!Rj#dv`jvR}lKa(~N>XIXNY%OF~ z0qVlZHX2HY;dFhiL4y21MHm6vNBK*@Ds}AUQsMqb*V+TFlUoTQA%uy@4DqER;}m6W zo_MO)8|r}ivLAh_mIa7!%@XLM@+6QhcVGvTJ(4+LFbvESjiyqG>OzSQliAwpcI=S@ zn&=W=3}sY~?HgtmknGI3>&p z7zsfFj$uCfneBt`2BzfG9pqSlW(H$>se;y7A^m2YWF(5n!g_;A0b;$sWs9|D&UXL) zEI9R5q`p}JnncY34m9uwCc~|GepY9TvW!d4^Xb*W++kBjUL<36vCL~HHoQ_xqS4~)fexD(H^ z<~|KgPrW%Cxw?Mw`A&%bR<)CfT_-W_;O=s9ZOo$UBq=eBS_Cf4B3MZkc)yQ$PbVUq z2Y8j?IlJ2N#kn^l^hL{hujEm}Dzm4~(n@2<#LJ)^Rnc&GVqe+L?U9CwI-D5u54E{P ztD)tlL*di+VJu-qC%Wt~7=tc9-h8AcHy;SpU{F1c)^~)9i2Y#|8LX7HrdZwMs>^fJ zt_s1BI&#}SHK$TR(Bs>VLn|*uu!WOgodSp>=6uZ;>&)+k}CGl<-7R;EI4787-MIk*INKS ztE;RH2lu-iy5I~i=wKl=g=f+ot@5XR<@U|j`9a7fn=f7M%d|Kq_i)BL!q3+`kKOdCS)C>09*z?ea=tkZ@|1QZV4Kur+@Us+$s z`jBWMdsta4CV9$?I8N>9S&gJe*7XEvSr}PNL>WeX&cwHjw$vu~b&AWuh*bZEf7AuX zSq&hPDu3_YQptqhauuhY^xn(vyB0&d;YGb4kC&~Q`NehB%Q+$3jcLD6FsGJcG`#N7 zM-!4xwHq+bDEgshqiO=L>3Z`#c6?Kn`P~{+N4RXmEk(K6m{j1=C2Jvld14(c&~Do| zVht~McX*YL4I3whH2|A<1kE+19}7|ks3D}~;N?8GOpx+;Sg1O^^no`oWq;yaUX_>! zg!LL|(!6>DF$&JH*l64OFK4Fn#6710Ir0G1lt9PtDoWl5ccT6Mt?%za`x`h!jTJc? zI64$)ZY13ZM85-l?jul>e)#0T6$(^7BADVaC4&i=)~61d>n`mGQpDXZQ;D}3+=!HkKUsb zGP|rOyC`^k37}Xpr&8(@3_^gjALk>jo6Oi%>~)5t9E|lmw(Z}#T`#vaq9G!f4 zxp)>lk!3jg$JLr6(n@$z0*PsDQVkZ9_iBA}p$UZhi7YjZqd4Ze!cQGkx4>MX_Fxm#jQ_L? z_j^(RNvO?{p+V!pKR2AY2f^5YG1LZt%r&ZT7I6IWP>feO8dEI_O-6_4O!%Z7y=@CK zsYrIo+Es!}7rVpDdU>kg$XFg)Hs9lury|}f-yG*p>f}N)0mMGGGvy~mz54l6(zy2p za$%NWXCUICDO5gf(rPt=OL50({2=rKNfbw;_ z$TK42)s9lg1DCoWAGGi}4M)5Mq}lVHG2lHruS#ym$r7}rd*G&XBh||TQ+VsWJqTuH zDDO?5lhKAf3dgaZc`Z2INVNvIWIaYCh8=GUMppNw7do#4d%M=RO)(EV5PJzH%G9|E z7-HZ0!ndX(_{e<*|Lr|YIIstw|sQEWJZL1s#Vw zD-`-mCS(D&8l#lzM<(A}bI*ez@8-5EK9ZkP-(5>izC*!li&GAUFeg&7T1VK0geU1| z-+nDinJ;k3JG8RGIg%)bYA1vIY?spBa#j?&00#+!B4toSHHVoOR`BNF7Dr=nkOg*r zlrPI7f7P#!(sGLrUlzuWf|{)l@O#L=MvPEpKa~jT?IaMKMHt80(b>%Xg$es(;QxEr zzjB}rpHccOZQjCPn|;>P>jq%wca+D5`R9cJfB`pO?j^;XsmxL}taRk-rv+(&rlm%rlYu$d_%FMK}`o%-U@z z%C)cH5>>>iYg}UJU+})-M0!;@f4=Ayg^(~}3MkX{{mMZSCmQWl(V;Gayk~FgptckL z`q-U<#-$1UwP10T&DUnBBDJ>~wcOk&=9J3q@gAcU8QkBQf}&YS)S}h~+U&ib+w~ax zTYRoS`GWK*Aj!keFf3!AUx3{Ymg^vc81^3B*)-Zm@{M#4 zhiiZx&agLRLRp-%B9xYKE-ItuK`sk!`y@v;ze0^^jQ|U|cD~zrwqig5UJ@QFZj5MmucWRg?HB9dWxx^bd%MIE`v(+X<5AJtfNl%DYaw^$hFjG+$MZbyO*flptg4wQk(Kzs5y=aI2oYD$>BRyyuu(rkuR(5C zGlh#bi%*jn4ba~0I$K{0sRwHoqhCTN{@6LWK1UqO-uba9{eybNh`+}l@OQBJwer#p zo1r>E6Fmns(bXPfO(s41-(k9@oPHAGF-~(6;2URy7Mvn5j!75b1qf@)W1l^K)>)8c zF8_`^DPJKxAixQ_nsdsHR0fWD0i`0v$p!)yIh&0WgO1@l3;8Q+OAHNuhDn)`8;iYZ zW4saa!i2)iy|>LF&O(tFY#90jQV8KfueE7ut=hVX#LaNsl&X?6m#Rq2?sA}RUcb+d zUVk2hZ%s1HPi+Lv?cm$`TwY{(p)|?(O__)2o0(JJd$<<=b;PG(B?cv^qyW@uOVGkq z_$v@9>mvR$ycs59a1y<{k1;3HyQBv>tQje#ZWHL#+q0!z@_AF{A!1J|~>`MEn~; zfY8pnUP{@8*fhnyt(q&Q;!#Y&K-run#_u__>{(WaRz$v2=49TvUs8nTh75H%R%Y{a z0gQ&&FFZey)60qX;P;3-csMf5255$_l#2I8bEv|I%dCWGY`RM|+@P-3o5Bf_+n9H~ z^`>{An7w0dlHRgw7dr~~+=n4xWkbmnKSt^EK{*zT$+Qwotzw;xbA6BG=?jM|>lrPq zICBAJ(_5V_3guZt`~d}TUx48Y|>sm2=TCSv6IFjU=OqWwXlYW}O%Lar{3 zgE0Q-2RC8Sg}e(gJ?pHtt8iprbN|BY4#!4`#**XX6Moi{#2nb&V4Dl{X(=_PHNC!v9he3LW9e7v${D4Eq zx^E(d*x&v9tW&L#Zi~T*gyIGgqT!qa`OYOGJlspj$5@XqrG9`_$FTUOIO#=}ocWna ztV$bsH2s)8kGhh+Q>&%wQ%hHMz(PbzybjDYZcFGcv6r@;i2~G=9v6xT3y#>Y zy;x&dZe6}sOJ03#OGWT@)oTzuxnZgVm~iv3MO2=7JH@M`dVpXTj}RT@N<*E%rzc5=1j$4mco0};sn!{=^H9) z^SMrz?4BDC4?D*2qUe`Lcv?FlyzYAcewg>mR}D0mKp;)92zVZ1(wBnVUcn4tsqrae zXZy`k^c4>791!UW)iyd`Rf1-{2ZL$%{Wau2EBc}H#j1k48KBM&gPJmEsNyJ3PYJ+r z*7CuY~m0xoLIL9WB z3=8Tq{`a7+6cCmO$i}V(5PCR)L9etFRk{V!{3P{_MQJm|(9BiOHQ%~p=2j1n^+y%5 zh*UuJ+D3Y*%>?e>euSNBSWn3!5d#so*^;#4-$CbJj0BU#YXd7^JRK{uV_4H0>Vq^9 zo@yJ!JjzGg_}q=mlfec%r=6>rv$W!Bw_*AaJpue@HHS+&e0l=g9u4p$-xmAk6gboxBl|l1U92j5#^T+ijggqbq0cV1ZWKImi5rUf3 z;{=V@hm+!bbjIUdS^=~qG$Yj*(KS<(6PuW4Qj7J{IMn;bWx3xg_Aru z3DlGyBDI0^-z%a9z`+6j=t{9+xv2;c0txu}nG!$%-SPI2bwCX>+jgQ2283Ptd5t~gM)?lyo6uK!a22O3d19~uNrpbG4QF#jr`Mj9J z_Q^4HN*w)6p876-8VT{?=*Bl7f%WsQ~c$-onIn9TIGY};A$9KBnR6$1p z`L?+QY3~at71IVv#R!4ECp|Ef3m1@~^LO6(eV+?PDF&4c@p*%75Dwiuk-qm*V9K^G zVbo-zCb!ti{pV*A-^~P;U_2o7@B`Afc!)Oll=)EuXa(InK1quN@{|QT<>iM+cYfq* zDQ|K5bVG*uNxFH`)Qa50%6^{GaMrqDH+d`OSWnZq1q~bc02WzD>?wX2>>;$mh6#@DRJ@cTRlw(o58Aah2s)%#OqUVl53 zO3Pf@Cpe9R*eCv;cUG|famU}HR)Cea@-?*}{FDL`lnZ*a^p+9r2kV7w^d5gOi#e?t zYG&puG|v$30P!Mv=_3Fg2^GyZhh<$&eR?-CB`$s(^CVl8fy6Gc!!()+`w7n;Uwu@8 zFUK58nW0Wy(yL!>I5(_bLh5m2>J&P~&`7-k3Tuau%O%p>V3AVM#vKR*$$1DB_`g4L z{ixA~>Xh?c8lsdWX-+f;S${XFEfm+of*dtx1v4jRA$dcZ_Z1^x=E}0-FjvjRM=I4L zjmDjN+?@}X_*hv zjD4Hn7?Yek4^5E^(CSkmwlt+z{90w%#5 zU|yLD(5*@ST+ffcE@W!H)IiCBVVZp^*TiDYg;aLTbxR_Sbvz`5#jw zOK1GG*VtRT0YBhaSXCxjeG;K$QpP4Z5C?OLDz%&2T+-YyzqLnL@>>`ztBxjfOi^m( zU<*Tu_q{M~X#>Z=sqS9x&Sl0qTUy(4J2Ggzd}a_4LKsJ+?0)0w=0r1-L8xV_jcChFt0fM6KvKUFP zK^6NM;&+*Xz9wz=RkVn(`)WMc&fsJgWuKZ%;>79N=E1#?0ZoNEceKeyEOp2^~K*&Y?YYTM90_~tp*$w;JG@9v71atds3r;t^% zI}%TdT<`^L4iV;ywD7|o+DgxX%{CYG6p`I@h<4Vbv0bNg7OqZaTpzgs8ZIoU$&+%? z_(cP|2-!|0K=qu~J0_$B2(lK9#WLiUFEvwb;%v$MkWRJ(wo>9h#KHe0 z&uVhG)}S8arY*m#+d1fJwDhaqMok7}Gl2nq;>UW(1MF)nI4p=hmfcieA~dWJ8M)cE z*XAfA#Msf-u{-Sd8CQ=p%qMH;Y`1zz$H}TmxROPv2UpG+4f&tkT5l9lG-C^dA|1FT zlQ*ae)DB-@qUROV!+E0-6lw^UqarNtetpl&^>+Q@RD#*u%;>C0;yCKslmI__8f<~dWWt5}TM&U04B$e3=Ol7ZY< z^zvx0pvs6Ihy|HAF?sNd8{z&ts_knmJzWV(ctA=YguI@QN7H>48`-@g=NMj?bSXi) zjmZlzy9^Sz?PeI4?%O^KD`oz?f~dEE>WW>((&VMiYCXjwHS0Cx15?R12b@U?rG#|n zadvyl8(sqj9n`M3Fu8msrsIoq2(=591P7W>(5*R>QxR6g=0?y!+Hos}rP{M?Wx6SG zgL2dYgpb{rXZcYSg9P10=0pq^A;~SWUfXa??r{>t+02Zu3j5xE^532T+qy5 z$dW?oChryhOqb&#QLUkaLv&ISn=3DAuvNtcaSG&KL<=UyP-!}C#L|~%<%zRtB?;LC2DOR8WB;};TDrCiLQ;RsQObJy8we{HO2!<6Ro zk3Jr$NatIp^_yc7Bg=&&(wimPIb?5F$-eLC@&Y%JIC3JHD8L6ffMbFkP_^iZJ@N9z z?fEVppO{dMGFK6Ya%SjH)iol8jMNJ{Upnp7mT0!TJ|8D*G>nN^1lv&ZWKVwqTEJtahg!9g4U1 zR;8)GhQpvtR!zKeD?WZsdU=Bs9F>-6I9*UY-?6K^bH?b!@yvw&l2rY$3#Ni(*(f1L32NM+^XK3C{#8K-k_&^bLm+|XgwKFGP){Kdz`)r57ML+c z`BnPK&{lF@}XGqy*a!N z#u$+LPFc9!X}-f>hbs6gv@#5^%M87VoV`M)5wOp)K4!3=B{TVTY{OE=)#KVr9>fXcy_hgQ>d}7HYn>OUs?CRbXfG#|b6X;Wu*c@1 zW;*d{FFVt;KTT{pj52;ToRMN8=V;3{P#0yPxh2FJn~AnpfS2Uo*6J$&+y`H`?zHGh z-Mv>_H2;>-i$9vDKFl_2#7h08Hnc+!OQtZ>YFr;llHIP{%{iVq97ykM-0s{!3Ynn9 z`EeD8chenrI?6`&<><5SHgbbc7KcK7OAHgjsyHxIS~NdVo3GKoH>hgfPfn@oEX{nM zDZFkexnroy*{I$NhBWaH09B|tZa-- zbaa;FU9rz7FuVbO?Yg|K*)>~@LCJ~%K9E3qGkp!jMnnsF%ercCw!iGXY60M$C}s0zM8FFo zJ0w1URvEmlP1}eA+HV@9aq(`&TN8Lo?y)G+LM;rZ2?#1{+kcfTpwq+3AgWlcvont@ zBcjGAUcKIw+}iv&+**Tzc>r;c$V9ugs`^$X(!H4pLvSzD&G7XcdcK=`aXX<7-6VBd zNwJjfU7>;-6e(Ta%sarE5!=oQm_kAu4&E`vTJsn5(yf--e$3B|?6KY}MG_wcmxuYX zGK`hVrWM~$5TXu5vT&mM$u}xHarOpT?(jxy`M#nFd-e(bEE%qVB+@@O(zjeo-FENO zo6#kiKz@d>=QrUUC+P`H$|qB13uiy}nqy4RX!|&XmyZmsU=Qoal=Qn@zTiMGrVS1cXZ^foVOS=_ukQEWeo(bwD1Yo zt5#cXQBAV%32N`BDynu7pGC|1rWJA!650X|dXo0-sNM!EvTPH?c=Na~Wpq*JHpw{5 zBf%pTbmiE#$2>=z-cIRxV`x+I-Dr6;wbWNrWYT};nR$R z`xg_e;GCedx9QMw^GUl=IShU_E%LcbwLK+xK6MHI)Z4tqTH?eY2Tp<7Lq_2tqLcpN zZ4L=0@W)6_mt?QV4f-q^pxguN<7de>K?3Z*HE{qjKHIX&RM@v6_OabuD#8VmFnn|5 zP|`;8B&ACW5?U-<)N4Obm5+HH4BA_LiCTZLSPS3Hn#d*a?8YWwe6UL z8A?E^@`~C1>P;objA?1=3T-@`-rJF4r^4;EALELvy|8vgK2||LWre_Vw&D>OqnQC8 zy0oc-2c+bgNkho!B;6E#WOp^ms{@s=7NCQ0o^eh_1zw1^yKIMVK)p7tv`?;~U}di7 zN$FfwX(E_8!DBJQ#5DqOf?lV1xkyaZp(RM2>|XD7U(HS7>KbMS2#7QZ zLMKnPL^$hI2KoZsE7cHQ9z{0oRv>?o9$e*rjqFv=yXBRuVnO+%Z3}+=;~A0qx-BbC zw;V4DD%fYDwwb_#`U#Qy%PS%%KziHS_v?$AFD7P&fQWb4&ulT&Oqo0sO-%2|E*kBy zMC~axLZ$6kqf!(5!q0*dy~O(|s?QKb`)hP9alYW$gk`YWxguzo8PSW*wbpuRDXTY2 zD7}r_9!ukzAd0zb$B_LNf)jX5nG7*xp_c135Fwa%`T?0HIr&0X8r$Yu4}emYwplqJ zC>NG>UU=BHa$|Pb1a%HH1ooN_h%eZ)W=<+PEng+*J&jZ_$utDn5Lar7KbA9JBn+rsB! zBv?rCU_x%jaQ^WQcc&NV0zh$>c^5DG%Fa9Rz{m-Hx=Nox$g}3?7Ct~hD9T}m7`maN zF>Epbqr$zWOx6M=Sgk|R2XXCt^W=eK!`kT?HEzJ^%79{ynH{o%!y$BW1*((=M`1uK zL_vHwqAap()+)w$R9qPtJuf_Nnmx)hJERXZ3|?Maru90Z#^AUHqyT-Kumk~CeO#&4 zXZp-(Jg+MIYeS}$E0l(SpV)w71cM~qa?jV#(-}R102uR%tO3qx%XMw0b697Gl%<&? zsd!=Hs1OTMPXDF4jy&vJXLLQx9Z0Sa(+R#FZre|OTCvBN!ubIZVVrCQNky`W->l?9 zR8KM>hsOl>IG6WSJ@wXK$<%tZ^1)r}maTS;Nju$J#0yK3Z+wN6b%q&dHDP5@X03Qd zf*VPEc|>e@lBF)T^s|XUDh#IypbHK7Bhma2&+SexpNoScOdu#D$@@FP{1J+BBo(31 zF>x97NOY=jd|<%9Hb5k%Ux!K(IRd~tC+7ujv_KJq*0pS`S;Q0wVm3#%c+_q*$M!^b z+Zt>MN4Q$a1xd?s^b(rP)%(xuud(Dk(`P->OAAE$(0v4>dbhk~^!ZdZ8Q_Drp5aA7 zOr{27=p@wuUYQ4nXj)2|K<2SAbwTaY@EH&C04*-h!%M2@ux*8+;{p%yEhPcyB~ct& zR$}D1RGn!DxpZlXrwCJGVigMES`0%TO>}FP;?&dX*;%qEOXWUH8fa6N-UQS*~B1psV)wN`dYmV(9cU9@6NA*L=1V`~FGVB?qG;&a66xT#jG}{|lEA|hi z($CB_6XbV2SBuk;@`g$O`Yjru@rr*14f~CT_r@oFqDV(;eomqBF~WwV4X~Tk3-35c z7n-8 zF1;E37J((z5zY2OIji0#jC8F;1t%R&_#x_-0L!Lm#(Vv=2~_(R`j*6Z$35EIl1eZK+c2?LwHKr5ZrNl*3IUXio<*GtyEUGqVT(6 zp{|L}bLZ~|SBu2nU)}7}1JHPbuOePw&~CN1n;wCYdAX>*O{7=CQ*lw6Qg9?hV+4y< zNKva4H(1T(+bQuz=k;MvAM~@Y0%tH!jnB!I5~tPR`ryM3VUero-5Y6KysK_X-%?1F zhkxAJm@){Jd`lQcmxICy)+2~?aPFzEYNY|e-W5MUt)$Q`tmdfs9YE*61>u>yAIc$- zGBIQD0*}sL4okStR3LXQJJ~d|mL<;(eMFSzONZ?Y(GRK#2+T7u@z^terYXJ20krM! z3wG1KlRt6>OV!f98H^U9e@OG&c9JJVK@PDY`%!?dgG+q%_IgWfo1M44*DUf)h}u^( zohi;MU?sk;HTLe&LLR^y=2A&33YrhUH>esnWsK*lB<%T5QGSE9(v;4(XH%tYaGxuzI* z8I;2Fd5e)8D^yFe!HXzV{EY8AkEthJY^ePEkfJW6JCeu@|DNhl3qp&ku8dGF{cDs) z^!zVz<}0%>$Zy=1R@Jcmt0Fn0yt4~2%lN}}VPjgyCTp6zVAmB?+2Ab(TiuxCq|{a< zzSYR6G-M7{D*5*S)+dr-aaumU>sFqG`2e1j={zu%sm8zEjrS5#ek5|d;$V0BehHSEzbAzr1q1LCr(H5en6RJ$}1PnVHKAt1{;SyM=CJHzO;t%f6Y z`{=b`igl|}%vs@peHY>U^l;BHj(LfM%;%TYFt@R#IQLrXQ;6T4EGB%fJ~q!E zh%A3@Wz(>knU`OvZq?-a32Rj7qg5LE|kTJkv`pj&S18p%rzLY=wPC3G3Lb{SSvYm)H6DZ;ijY zzt!0|yV=MEB*n+diK$0hXNduvaU-1-&vb&{IB(6gkKVAKqCWiVPK4A#DiOius}g?4 zhj$G~b`Sy#h4*47#_$qWC%GhV@0H14zmHbt;XMs)LueGNOn`fyab$E3x#6d$y!qO6 zdkNn+qaZFg=R*8ctnx&Mh*YQRQ`kM~DgIDN?j1$zsLcHzOpk{hZ730vq!}6vY=9Yb zm3h3OkOE2NFaduPtp4I@?x!6%taIb_n$(ZpueJ-#qHeb%!%&Xm_R<;XEfCH)_S#0N zITfIL#Dn1ZQz~4Qm;MaU%ma zK{7hD3ebC-Ol*9~(?Y^5=J;lX_}V1}|D7W5*Fe%C0cXcu1|1#V-5}DuW?Yjs!F?43 z$soZ9(vp&&OyO*K@+GT&*C6lMUG5~!pIfQ6dvE_lntPMwjob3 zXXcLWf%h4&Q-|^}E)Q^b=M8~`V&?hv-rxV6&~Rub&?X3Z2L9clZXX`$m(!%@x$8)x zvdroz!`pLY=wgtVLju8dso8UFgFKg=7rdD)23a~L&R)GeM3$;l}XEP zuiDZ36BC&f`H4SM+2EBdU=+sdvKdVpq zzQWnsT}9&(FW|hNM(-R6=~YX$PusUmyfrv>EHAp}B2ijnW(@}3d>PIK4m@DmShb#`|g2k1$yU*Hnp(%^YWh zBzPSStDaxzyqZdW$v<3+kx30CL>gzCk`k40T;~4erg07T<+W z);xt?SD8edBal5??ufPn!S{T;A35gQYmqc=s`?pHJN8-2?l5@7Spe-6oY8EpUn;58 zd}4VLm3Y8{rW+PMJ)gmM-Mt!Ec}2XZ+~`xxu454+yE5L)XLS<-*XbiO=c#tSTQo6| zS0kKjrDMa~0Cq3lP%FF0oqc{=O+KUz0ZqFIHXCS*%&bkVHN3ixCU&SZ7K(2PDh;Vw zsw|~RJB(s;`(V|K+5#Hi6tyE7=&n)lP_O#Wsnx!HZ)k|PSa#uvzvTHr@1s85N@hsC z*$fH~=iM}nkYg=bfQA?1wZok@tKtHTb;*P}=8rpq>cdK|WHo+U0D{P5-FOio<23Gz zO?A`V)zOP(mVAKdn;-f~n%YL?IR)3Q>(bTLTOz!NJ?P(FOu+y$7Wo@r5;kgW=~omA ze7bDfn>Y|lNyf7u-9h3Brck3VLi+09n)o5Ki5YKT{L9!oHEAoZNFU9WJ)h$|T^_ms z^VZwN{tDnSVV|>9{5w9hEfY1Dpz+)KP6e|{mKN0)bz%xDAu|{h?(%y{mqSUq(47_J z{aU$QbCJo>Q!UYeki6kfC}k~WWoTb|WfRr-*pMX>EHE=&{lJ3Gw+9O<@AG5^4H$gB^G#UXI$M@0Dppt&f@zKay72RlXJ&LL zOnRRI_I>c6>&z)Ak3|Cp^G7WVIK+VjR53+^`HNP(tBehW^7L6P5Jkg??CE2*U!-3We(ks57JBlI3m7ZSp^lOP)y2Fo2}_#Qx`7lYjT&$#an>S^^m#v^+d_0P5)?kv@#_>2rub(eS`S z-N%x!ckLzzf{GXElp|wAeoPCr8g2=xQ!y#M=6}2KKfJ~#8VHqtYvA}rbNqKtpVAorl+FHI z6y=xb?`;xzZomk^uJT(k(9)3naU6eh9YKSZ{8JOyGu1~hh*${RF#I*>C$z&SU?R2O zV1a+J5dYoxCp5$-I`Gv0(t-cjv%fiqAkfn&*6|yN{J*{XA2#9>Gj19nGjtE69AKXj z64`$#Jb$87b$^5Ze^XBgJbwyck7l0KEuR3%?H|p6%9#JZ znWx3fKgFY-gQVpjO*|>q{fPmCbOC{c|9|yQs&`K$BV8Xmmu-UgXyQo{{s|D#^DiLM z|2FfV%={DSJYSGB*+cdNSZadwXzFQpE?_jKhILLc;77%6JhH<}B4KC-k6f779TWq09vIGmJ#n z!~{jhu$fxLD3Tl*%Pcp|-lZjkAg^Jwg8~cStA}SN=@|$~-`FY1Y|d__S0V`V4z~3O z8jx}eXR|2A161Zd9Yz`~Ru`fW*0 zrg$Ner38kFJs&!h2R%K2%?_^I0u<-t!8tQAaK^k`+Di_u$>%ES2F$No6hRm;>8N#P zK+jT|9YyaK#3LVPgVSV+(*`Xi~2FAZ|_~C>c12LGZa>7$@DVURA(eA8ht8ct^Q4 zAUdLfisKo4HipA!v{R_HhI1jHm$4qSJE6gllOx_e0>f-|%SD&M87k0q_5cT6Y6#7( z1g6G)%3)L4)~fsUL-w>JJOeJxzugd8TaE4F-zirUUnmDpDxgT&QIbqEUk_U)xSAneDcA&Qfw9_^?BGaUAwmUS z+bd-0!!BFsXTKt-iZW#f#5tl!yTx?8K1^&e4}+&Ys?GIp6Ql}86IZfIBf=)ki2HwS zxz91Ul$w;XX11e)^fGOdx_9!){Q2IIJ;oi>iv#HvO<}hqqE$Ixf;ArJDw(=Gncrnr z-V|jhpZV-q|Iudi2Rq4H6(uv2Wln}<$b z_#JX#^w@jd6yJMGy_hMnPf2%o(s~Dcz=1QvufC*DzSmt5p6@r;ebk|}`?V@&UqZg? z2{W~(3nwn!Tys*b-!nv#qjyqzDJ8s=bXZcldVB5So?F;5hcI6fpfOlqb1=8bk5dAS zcLYYh*k+|%ztD3rOn%Dr_493H_3$8ij)?e&<1;C{GyLm4e~F)?FC{jks=W@VH~X7N z7X~R>j;)V=k?-Oa6ncm~|G2+_J+AR^z~7UC4SdXT^yuA7J8VuCzRn&1!ff}~?u)?iR$#ly%6QIX`Zgx5 zuv06$@R~PWT`WW@q>H{^Gcd5fgVA;4f?X5vlZlaQlfPlA{w8~n^_BHCl7MsdOCI9H z#C=paDyz`W?mXO>9%LnB*cnk0H=||YDRgMF+LT{`FHVYNe7~bc{r5ESQpu&APU!Z-NPNXlXrV6h_))%?_r15O&rinHG9*jm!}mfg#I87`jt3-)j^ICJ-z z55@#_O3gHaShqWWC`G&m*-me9D)tcf4zr7$>^(Z4fUW%V-fVcDrha?Qqt%T`&bzWz zvxw`W(#Gd&w;pl*Jox9UnWabNJ1bE;rzsvzig)uYim(?&`@)k%SRdo1dXP#V2P)%{ z#>hlej5*?J2$Lt2ys{q|%yobo@hYf0F~aoAcCl zAq`Hp+LoRAS}TI2PCaoOLKoFv(a3vz)i6#>`BsTrevsqXCGs?JMa)XHVv$ZwxN%9x zW~%+Ejd`UfXy+P=s*+fb>f=_sH2L|xuKc^n=1rY%>TGOhjPXLs)1I@k##ba_jZO#5 z6R$C2xB6D42FoAod^(hFujqI>C|Ez{ed60Iv6UmG&xYt;zx$$(-kmADpMQX3Ze~(4 z>$fen;meaiCFidTMV_ow1=THnB&=5Ps3!HufK_*v23ya%%49z44_0fe!*Jqt!}uvw z>lS6@(4L{K>Qa{;?5JqEVYC1F)!OhD>Zi98U0pLlD*c9yY?9A@eU&wuYDu`0b33%^ zN^Gmf4v8Vs#LSwGQ+0Rb_sOfh@g=(y+n{Z%@uE`?&ond(ZBH&(MO!TJ^iEK=x_(%7 z&ACnE_lj29mxY#QD5RWVy*-w@vBi7n)R3*%yjQ$J;#}mqi3_xs$@Q8GPfNWAcCcHL z{htsFR%aH{{XhK`DIw-PXyhm|*ZL-4j{2X4zY;j7hr!(9J;};{f>lS*+e^b1>Y9GP zl%hX$R-t9j>S)eEk=;HvZ4aJSMYr^gv}Lw>yx+EaYVT6ywb{YfHlAx){qJpF{T5G@ z*(e`^YONtXJt6qP%(K$A{OLFHc3-6td7H4x)dF?f=_3`Zl8rW;KJdIK^7g?m_Fo(TZG+kI z{Ac&HB;RpFem;hCM2wDC8!PmdIJ~fz;Ki>6AI+8EdYY67yxiGCX=D_EpJc#?jO`Qw zYJm^R&!3tipt%?Vg-jAffL?y8;FpLh6q<8k2E=FKD1t1rT%DoD6Zd3G>bf}z3Ckeg zo^w93fqJ{;Hb}lY&xi{<{W&_qMjE^>!%!Z$<>DxU+_T*H^fz(3kuqOEJ`jQcvwpb^ z5~sO-*&I9Dn~;o~!?;gZwOUz`ExFLPMEyh8Y}4Q>O! z!Ag(^FcFAIowfK#C(T>q_`wSmmH?E=vEGduWo!icv&-xv_h zTRI;DLdm~M*!a5v9*kAuD1vCX+`Y946jLE5s#65maJddiuz*69|Dz(PsK!wQ2$9d= z;o9#8MDVx>M-c!;t2*%{47xhWYyy+bN{~Y_{24?P%H item:'$item' tag:'$tag'" switch -exact -- $tag { - KEYVAL - QKEYVAL { + KEYVAL - QKEYVAL - SQKEYVAL { log::debug "--> processing $tag: $item" set key [lindex $item 1] + if {$tag eq "QKEYVAL"} { + set key [::tomlish::utils::unescape_string $key] + } #!todo - normalize key. (may be quoted/doublequoted) if {[dict exists $datastructure $key]} { @@ -286,10 +289,20 @@ namespace eval tomlish { } + #ensure empty tables are still represented in the datastructure + set subkey [list] + foreach k $key_hierarchy { + lappend subkey $k + if {![dict exists $datastructure {*}$subkey]} { + dict set datastructure {*}$subkey [list] + } else { + tomlish::log::notice "get_dict datastructure at subkey $subkey already had data: [dict get $datastructure {*}$subkey]" + } + } #We must do this after the key-collision test above! lappend tablenames_seen $tablename - + log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy : $key_hierarchy" log::debug ">>>>>>>>>>>>>>>>>>>>key_hierarchy_raw: $key_hierarchy_raw" @@ -298,8 +311,11 @@ namespace eval tomlish { foreach element [lrange $item 2 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { + KEYVAL - QKEYVAL - SQKEYVAL { set keyval_key [lindex $element 1] + if {$type eq "QKEYVAL"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } set keyval_dict [_get_keyval_value $element] dict set datastructure {*}$key_hierarchy $keyval_key $keyval_dict } @@ -307,7 +323,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only KEYVAL,QKEYVAL,SQKEYVAL,NEWLINE,COMMENT,WS" } } } @@ -320,16 +336,19 @@ namespace eval tomlish { foreach element [lrange $item 1 end] { set type [lindex $element 0] switch -exact -- $type { - KEYVAL - QKEYVAL { + KEYVAL - QKEYVAL - SQKEYVAL { set keyval_key [lindex $element 1] set keyval_dict [_get_keyval_value $element] + if {$type eq "QKEYVAL"} { + set keyval_key [::tomlish::utils::unescape_string $keyval_key] + } dict set datastructure $keyval_key $keyval_dict } NEWLINE - COMMENT - WS { #ignore } default { - error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in ITABLE context. Expected only KEYVAL,QKEYVAL,SQKEYVAL,NEWLINE,COMMENT,WS" } } } @@ -350,12 +369,16 @@ namespace eval tomlish { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::utils::unescape_string $value]] } - TABLE - ARRAY - MULTISTRING { + STRINGLIT { + set value [lindex $element 1] + lappend datastructure [list type $type value $value] + } + ITABLE - TABLE - ARRAY - MULTISTRING { set value [lindex $element 1] lappend datastructure [list type $type value [::tomlish::get_dict [list $element]]] } - WS - SEP { - #ignore whitespace and commas + WS - SEP - NEWLINE - COMMENT { + #ignore whitespace, commas, newlines and comments } default { error "Unexpected value type '$type' found in array" @@ -553,13 +576,13 @@ namespace eval tomlish::encode { proc boolean {b} { #convert any Tcl-acceptable boolean to boolean as accepted by toml - lower case true/false - if {![string is boolean -strict $b]} { + if {![tcl::string::is boolean -strict $b]} { error "Unable to convert '$b' to Toml boolean true|false. [::tomlish::parse::report_line]" } else { if {[expr {$b && 1}]} { - return [list BOOL true] + return [::list BOOL true] } else { - return [list BOOL false] + return [::list BOOL false] } } } @@ -610,12 +633,13 @@ namespace eval tomlish::encode { #Handle invalid tag nestings switch -- $context { QKEYVAL - + SQKEYVAL - KEYVAL { - if {$tag in {KEYVAL QKEYVAL}} { + if {$tag in {KEYVAL QKEYVAL SQKEYVAL}} { error "Invalid tag '$tag' encountered within '$context'" } } - MULTISTRING { + MULTISTRING - MULTILITERAL { #explicitly list the valid child tags if {$tag ni {STRING STRINGPART WS NEWLINE CONT}} { error "Invalid tag '$tag' encountered within a MULTISTRING" @@ -631,9 +655,12 @@ namespace eval tomlish::encode { #optional root tag. Ignore. } QKEYVAL - + SQKEYVAL - KEYVAL { if {$tag eq "KEYVAL"} { append toml [lindex $item 1] ;#Key + } elseif {$tag eq "SQKEYVAL"} { + append toml '[lindex $item 1]' ;#SQuoted Key } else { append toml \"[lindex $item 1]\" ;#Quoted Key } @@ -691,10 +718,10 @@ namespace eval tomlish::encode { STRING { #simple double quoted strings only # - return \"[lindex $item 1]\" + append toml \"[lindex $item 1]\" } STRINGPART { - return [lindex $item 1] + append toml [lindex $item 1] } MULTISTRING { #Tripple quoted string which is a container for newlines,whitespace and multiple strings/stringparts @@ -704,17 +731,17 @@ namespace eval tomlish::encode { } append toml "\"\"\"$multistring\"\"\"" } - LITSTRING { + STRINGLIT { #Single Quoted string(literal string) append toml '[lindex $item 1]' } - MULTILITSTRING { - #review - multilitstring can be handled as a single string? - set litstring "" + MULTISTRINGLIT { + #review - multistringlit can be handled as a single string? + set stringlit "" foreach part [lrange $item 1 end] { - append litstring [::tomlish::encode::tomlish [list $part] $nextcontext] + append stringlit [::tomlish::encode::tomlish [list $part] $nextcontext] } - append toml '''$litstring''' + append toml '''$stringlit''' } INT - BOOL - @@ -853,11 +880,11 @@ namespace eval tomlish::decode { set prevstate $state ##### set nextstate [::tomlish::parse::getNextState $tokenType $prevstate] - ::tomlish::log::info "tok: $tok STATE TRANSITION tokenType: '$tokenType' triggering '$state' -> '$nextstate' last_space_action:$last_space_action" + ::tomlish::log::info "STATE TRANSITION tokenType: '$tokenType' tok: $tok triggering '$state' -> '$nextstate' last_space_action:$last_space_action" set state $nextstate if {$state eq "err"} { - error "State error - aborting parse. [tomlish::parse::report_line]" + error "State error for tokenType: $tokenType tok: $tok - aborting parse. [tomlish::parse::report_line]" } if {$last_space_action eq "pop"} { @@ -885,10 +912,10 @@ namespace eval tomlish::decode { lappend v($nest) "SEP" } endinlinetable { - puts stderr "endinlinetable" + ::tomlish::log::debug "endinlinetable for last_space_action pop" } endmultiquote { - puts stderr "endmultiquote for last_space_action 'pop'" + ::tomlish::log::debug "endmultiquote for last_space_action 'pop'" } default { error "unexpected tokenType '$tokenType' for last_space_action 'pop'" @@ -909,6 +936,9 @@ namespace eval tomlish::decode { quotedkey - itablequotedkey { set v($nest) [list QKEYVAL $tok] ;#$tok is the keyname } + squotedkey - itablesquotedkey { + set v($nest) [list SQKEYVAL $tok] ;#$tok is the keyname + } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish @@ -940,12 +970,8 @@ namespace eval tomlish::decode { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } startmultiquote { - puts stderr "push trigger tokenType startmultiquote (todo)" + ::tomlish::log::debug "push trigger tokenType startmultiquote" set v($nest) [list MULTISTRING] ;#container for STRINGPART, NEWLINE - #JMN ??? - #set next_tokenType_known 1 - #::tomlish::parse::set_tokenType "multistring" - #set tok "" } default { error "push trigger tokenType '$tokenType' not yet implemented" @@ -969,7 +995,7 @@ namespace eval tomlish::decode { #no output into the tomlish list for this token } startinlinetable { - puts stderr "decode::toml error. did not expect startlinetable without space level change" + puts stderr "decode::toml error. did not expect startinlinetable without space level change" } startquote { switch -exact -- $nextstate { @@ -993,6 +1019,28 @@ namespace eval tomlish::decode { } } } + startsquote { + switch -exact -- $nextstate { + stringlit { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "stringlit" + set tok "" + } + squotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + itablesquotedkey { + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "itablesquotedkey" + set tok "" + } + default { + error "startsquote switch case not implemented for nextstate: $nextstate" + } + } + } startmultiquote { #review puts stderr "no space level change - got startmultiquote" @@ -1004,6 +1052,9 @@ namespace eval tomlish::decode { #nothing to do? set tok "" } + endsquote { + set tok "" + } endmultiquote { #JMN!! set tok "" @@ -1023,6 +1074,9 @@ namespace eval tomlish::decode { } itablequotedkey { + } + stringlit { + lappend v($nest) [list STRINGLIT $tok] } untyped-value { #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. @@ -1074,6 +1128,7 @@ namespace eval tomlish::decode { } if {!$next_tokenType_known} { + ::tomlish::log::notice "tomlish::decode::toml - current tokenType:$tokenType Next token type not known" ::tomlish::parse::set_tokenType "" set tok "" } @@ -1108,8 +1163,6 @@ namespace eval tomlish::decode { } finally { set is_parsing 0 } - - return $v(0) } @@ -1136,22 +1189,76 @@ namespace eval tomlish::utils { set segments [tablename_split $tablename false] set trimmed_segments [list] foreach seg $segments { - lappend trimmed_segments [::string trim $seg [list " " \t]] + lappend trimmed_segments [::string trim $seg " \t"] } return [join $trimmed_segments .] } + #basic generic quote matching for single and double quotes + #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes + proc tok_in_quotedpart {tok} { + set sLen [::string length $tok] + set quote_type "" + set had_slash 0 + for {set i 0} {$i < $sLen} {incr i} { + set c [tcl::string::index $tok $i] + if {$quote_type eq ""} { + if {$had_slash} { + #don't enter quote mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [string map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + set quote_type dq + } + sq { + set quote_type sq + } + bsl { + set had_slash 1 + } + } + } + } else { + if {$had_slash} { + #don't leave quoted mode + #leave slash_mode because even if current char is slash - it is escaped + set had_slash 0 + } else { + set ctype [string map [list {"} dq {'} sq \\ bsl] $c] + switch -- $ctype { + dq { + if {$quote_type eq "dq"} { + set quote_type "" + } + } + sq { + if {$quote_type eq "sq"} { + set quote_type "" + } + } + bsl { + set had_slash 1 + } + } + } + } + } + return $quote_type ;#dq | sq + } + #utils::tablename_split proc tablename_split {tablename {normalize false}} { #we can't just split on . because we have to handle quoted segments which may contain a dot. #eg {dog."tater.man"} - set i 0 set sLen [::string length $tablename] set segments [list] set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" - for {} {$i < $sLen} {} { + for {set i 0} {$i < $sLen} {incr i} { if {$i > 0} { set lastChar [::string index $tablename [expr {$i - 1}]] @@ -1160,7 +1267,6 @@ namespace eval tomlish::utils { } set c [::string index $tablename $i] - incr i if {$c eq "."} { switch -exact -- $mode { @@ -1240,7 +1346,7 @@ namespace eval tomlish::utils { } append seg $c } - if {$i == $sLen} { + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" switch -exact -- $mode { @@ -1275,14 +1381,14 @@ namespace eval tomlish::utils { } } foreach seg $segments { - set trimmed [::string trim $seg [list " " \t]] + set trimmed [::string trim $seg " \t"] #note - we explicitly allow 'empty' quoted strings '' & "" # (these are 'discouraged' but valid toml keys) #if {$trimmed in [list "''" "\"\""]} { # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" #} if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename'" + error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" } } return $segments @@ -1640,18 +1746,26 @@ namespace eval tomlish::utils { } proc is_datetime {str} { - #e.g 1979-05-27T00:32:00-07:00 + #e.g 1979-05-27 + #e.g 1979-05-27T00:32:00Z + #e.g 1979-05-27 00:32:00-07:00 + #e.g 1979-05-27 00:32:00+10:00 + #e.g 1979-05-27 00:32:00.999999-07:00 + set matches [regexp -all {[zZtT0-9\-\+\.:]} $str] if {[::string length $str] == $matches} { #all characters in legal range #!todo - use full RFC 3339 parser? lassign [split $str T] datepart timepart #!todo - what if the value is 'time only'? - - if {[catch {clock scan $datepart} err]} { - puts stderr "tcl clock scan failed err:'$err'" - return 0 - } + + #Tcl's free-form clock scan (no -format option) is deprecated + # + #if {[catch {clock scan $datepart} err]} { + # puts stderr "tcl clock scan failed err:'$err'" + # return 0 + #} + #!todo - verify time part is reasonable } else { return 0 @@ -1692,20 +1806,35 @@ namespace eval tomlish::parse { set stateMatrix [dict create] dict set stateMatrix\ - key-space {whitespace "key-space" newline "key-space" bom "key-space" barekey {pushspace "keyval-space"} eof "end" startquote "quotedkey" startmultiquote "err" endquote "err" comment "key-space" comma "err" starttablename "tablename" starttablearrayname "tablearrayname"} - + key-space { + whitespace "key-space"\ + newline "key-space"\ + bom "key-space"\ + barekey {pushspace "keyval-space"}\ + startquote "quotedkey"\ + startsquote "squotedkey"\ + comment "key-space"\ + starttablename "tablename"\ + starttablearrayname "tablearrayname"\ + startmultiquote "err"\ + endquote "err"\ + comma "err"\ + eof "end"\ + } dict set stateMatrix\ curly-space {\ - whitespace "curly-space"\ - newline "curly-space"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + whitespace "curly-space"\ + newline "curly-space"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + itablesquotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + startsquote "itablesquotedkey"\ + comma "curly-space"\ + comment "err"\ + eof "err"\ } #REVIEW @@ -1713,97 +1842,195 @@ namespace eval tomlish::parse { #https://github.com/toml-lang/toml/issues/781 dict set stateMatrix\ curly-syntax {\ - whitespace "curly-syntax"\ - newline "curly-syntax"\ - barekey {pushspace "itablekeyval-space"}\ - itablequotedkey "itablekeyval-space"\ - endinlinetable "popspace"\ - startquote "itablequotedkey"\ - comma "curly-space"\ - eof "err"\ - comment "err"\ + whitespace "curly-syntax"\ + newline "curly-syntax"\ + barekey {pushspace "itablekeyval-space"}\ + itablequotedkey "itablekeyval-space"\ + endinlinetable "popspace"\ + startquote "itablequotedkey"\ + comma "curly-space"\ + comment "curly-space"\ + eof "err"\ } + #review comment "err" vs comment "curly-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES + #We currently allow multiline ITABLES (also with comments) in the tokenizer. + #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? dict set stateMatrix\ value-expected {\ - whitespace "value-expected"\ - newline "err"\ - eof "err"\ - untyped-value "samespace"\ - startquote "string"\ - startmultiquote {pushspace "multistring-space"}\ - startinlinetable {pushspace curly-space}\ - comment "err"\ - comma "err"\ - startarray {pushspace array-space}\ + whitespace "value-expected"\ + untyped-value "samespace"\ + startquote "string"\ + startsquote "stringlit"\ + startmultiquote {pushspace "multistring-space"}\ + startinlinetable {pushspace curly-space}\ + startarray {pushspace array-space}\ + comment "err"\ + comma "err"\ + newline "err"\ + eof "err"\ } dict set stateMatrix\ array-space {\ - whitespace "array-space"\ - newline "array-space"\ - eof "err"\ - untyped-value "samespace"\ - startarray {pushspace "array-space"}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "array-space"\ + whitespace "array-space"\ + newline "array-space"\ + untyped-value "samespace"\ + startarray {pushspace "array-space"}\ + endarray "popspace"\ + startmultiquote {pushspace multistring-space}\ + startinlinetable {pushspace curly-space}\ + startquote "string"\ + startsquote "stringlit"\ + comma "array-space"\ + comment "array-space"\ + eof "err"\ } dict set stateMatrix\ array-syntax {\ - whitespace "array-syntax"\ - newline "array-syntax"\ - untyped-value "samespace"\ - startarray {pushspace array-space}\ - endarray "popspace"\ - startquote "string"\ - startmultiquote "multistring"\ - comma "array-space"\ - comment "err"\ + whitespace "array-syntax"\ + newline "array-syntax"\ + untyped-value "samespace"\ + startarray {pushspace array-space}\ + endarray "popspace"\ + startmultiquote {pushspace multistring-space}\ + startquote "string"\ + startsquote "stringlit"\ + comma "array-space"\ + comment "err"\ } - - dict set stateMatrix\ - itablekeyval-syntax {whitespace "itablekeyval-syntax" endquote "itablekeyval-syntax" newline "err" equal "value-expected" eof "err"} - #dict set stateMatrix\ - # itablekeytail {whitespace "itablekeytail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} dict set stateMatrix\ - itablevaltail {whitespace "itablevaltail" endinlinetable "popspace" comma "popspace" newline "err" comment "err" eof "err"} + itablekeyval-syntax {\ + whitespace "itablekeyval-syntax"\ + endquote "itablekeyval-syntax"\ + endsquote "itablekeyval-syntax"\ + newline "err"\ + equal "value-expected"\ + eof "err"\ + } dict set stateMatrix\ itablekeyval-space {} + + dict set stateMatrix\ + itablevaltail {\ + whitespace "itablevaltail"\ + endinlinetable "popspace"\ + comma "popspace"\ + newline "itablevaltail"\ + comment "itablevaltail"\ + eof "err"\ + } dict set stateMatrix\ - itablequotedkey {whitespace "NA" itablequotedkey {pushspace "itablekeyval-space"} newline "err" endquote "itablekeyval-syntax"} + itablequotedkey {\ + whitespace "NA"\ + itablequotedkey {pushspace "itablekeyval-space"}\ + newline "err"\ + endquote "itablekeyval-syntax"\ + } + dict set stateMatrix\ + itablesquotedkey {\ + whitespace "NA"\ + itablesquotedkey {pushspace "itablekeyval-space"}\ + newline "err"\ + endsquote "itablekeyval-syntax"\ + } dict set stateMatrix\ - keyval-syntax {whitespace "keyval-syntax" endquote "keyval-syntax" comma "err" newline "err" equal "value-expected" eof "err"} + keyval-space {\ + } + dict set stateMatrix\ + keyval-syntax {\ + whitespace "keyval-syntax"\ + endquote "keyval-syntax"\ + endsquote "keyval-syntax"\ + equal "value-expected"\ + comma "err"\ + newline "err"\ + eof "err"\ + } dict set stateMatrix\ keytail {whitespace "keytail" newline "popspace" comment "keytail" eof "end"} - dict set stateMatrix\ - keyval-space {} + #quotedkey & squotedkey need to pushspace from self to keyval-space + dict set stateMatrix\ + quotedkey {\ + whitespace "NA"\ + quotedkey {pushspace "keyval-space"}\ + newline "err"\ + endquote "keyval-syntax"\ + } dict set stateMatrix\ - quotedkey {whitespace "NA" quotedkey {pushspace "keyval-space"} newline "err" endquote "keyval-syntax"} + squotedkey {\ + whitespace "NA"\ + squotedkey {pushspace "keyval-space"}\ + newline "err"\ + endsquote "keyval-syntax"\ + } + dict set stateMatrix\ - string {whitespace "NA" newline "err" string "string" endquote "samespace" eof "err"} + string {\ + whitespace "NA"\ + string "string"\ + endquote "samespace"\ + newline "err"\ + eof "err"\ + } dict set stateMatrix\ - stringpart {eof "err" continuation "samespace" endmultiquote "popspace"} + stringlit {\ + whitespace "NA"\ + stringlit "stringlit"\ + endsquote "samespace"\ + newline "err"\ + eof "err"\ + } + + dict set stateMatrix\ - multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} + stringpart {\ + continuation "samespace"\ + endmultiquote "popspace"\ + eof "err"\ + } + #dict set stateMatrix\ + # multistring {whitespace "NA" newline "NA" multistring "multistring" endmultiquote "samespace" endquote "err" eof "err"} dict set stateMatrix\ - multistring-space {whitespace "multistring-space" continuation "multistring-space" stringpart "multistring-space" multistring "multistring-space" newline "multistring-space" eof "err" endmultiquote "popspace"} + multistring-space {\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + endmultiquote "popspace"\ + eof "err"\ + } + #multistring "multistring-space" + + + dict set stateMatrix\ - tablename {whitespace "NA" tablename {zeropoppushspace key-space} tablename2 {pushspace key-space} newline "err" endtablename "tablenametail"} + tablename {\ + whitespace "NA"\ + tablename {zeropoppushspace key-space}\ + tablename2 {pushspace key-space}\ + endtablename "tablenametail"\ + comma "err"\ + newline "err"\ + } dict set stateMatrix\ - baretablename {whitespace "NA" newline "err" equal "value-expected"} + tablearrayname {\ + whitespace "NA"\ + tablearrayname {zeropoppushspace key-space}\ + tablearrayname2 {pushspace key-space}\ + endtablearray "tablearraynametail"\ + comma "err"\ + newline "err"\ + } + dict set stateMatrix\ tablenametail {whitespace "tablenametail" newline "key-space" comment "tablenametail" eof "end"} - dict set stateMatrix\ - tablearrayname {whitespace "NA" tablearrayname {zeropoppushspace key-space} tablearrayname2 {pushspace key-space} newline "err" endtablearray "tablearraynametail"} dict set stateMatrix\ tablearraynametail {whitespace "tablearraynametail" newline "key-space" comment "tablearraynametail" eof "end"} dict set stateMatrix\ @@ -1854,17 +2081,17 @@ namespace eval tomlish::parse { # e.g "string {array-space array-syntax}" means when transitioning from string to array-space, jump to array-syntax instead. #this is useful as we often don't know state $b. e.g when it is decided by 'popspace' variable spacePopTransitions { - array-space array-syntax - curly-space curly-syntax - keyval-space keytail - itablekeyval-space itablevaltail + array-space array-syntax + curly-space curly-syntax + keyval-space keytail + itablekeyval-space itablevaltail } variable spacePushTransitions { - keyval-space keyval-syntax - itablekeyval-space itablekeyval-syntax - array-space array-space - curly-space curly-space - key-space tablename + keyval-space keyval-syntax + itablekeyval-space itablekeyval-syntax + array-space array-space + curly-space curly-space + key-space tablename } @@ -1879,8 +2106,8 @@ namespace eval tomlish::parse { variable spacePopTransitions variable spacePushTransitions - variable last_space_action "none" - variable last_space_type "none" + variable last_space_action "none" + variable last_space_type "none" variable state_list set result "" @@ -1988,7 +2215,7 @@ namespace eval tomlish::parse { foreach el $list { if { [lindex $el 0] eq "NEWLINE"} { append prettier "[list $el]\n" - } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL TABLE ARRAY})} { + } elseif {([llength $el] > 1) && ([lindex $el 0] in {KEYVAL QKEYVAL SQKEYVAL TABLE ARRAY})} { append prettier [nest_pretty1 $el] } else { append prettier "[list $el] " @@ -2029,6 +2256,7 @@ namespace eval tomlish::parse { incr i -1 return -level 2 1 } elseif {$toklen == 2} { + puts stderr "_shortcircuit_startquotesequence toklen 2" set_tokenType "startquote" set tok "\"" incr i -2 @@ -2036,7 +2264,7 @@ namespace eval tomlish::parse { } } - #return a list of 0 1 or 2 tokens + #returns 0 or 1 #tomlish::parse::tok proc tok {s} { variable nest @@ -2085,22 +2313,30 @@ namespace eval tomlish::parse { } set c [string index $s $i] + tomlish::log::debug "- tokloop char <$c> index $i tokenType:$tokenType tok:<$tok>" #puts "got char $c during tokenType '$tokenType'" incr i ;#must incr here because we do'returns'inside the loop - set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] + set ctest [string map {\{ lc \} rc \[ lb \] rb \" dq ' sq \\ bsl \r cr \n lf \t tab \uFEFF bom} $c] switch -exact -- $ctest { # { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 + + if {$had_slash} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { error "Unexpected character '$c' during bare key. Only \[a-zA-Z_-\] allowed. [tomlish::parse::report_line]" } @@ -2118,43 +2354,80 @@ namespace eval tomlish::parse { incr i -1 return 1 } + starttablename - starttablearrayname { + #fix! + error "Character '#' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare parts - but allowed in quoted parts - let tablename parser sort it out + append tok $c + } default { - #quotedkey, string, multistring + #quotedkey, itablequotedkey, string,stringlit, multistring append tok $c } } } else { - #$slash_active not relevant when no tokenType - #start of token if we're not in a token - set_tokenType comment - set tok "" ;#The hash is not part of the comment data + switch -- $state { + multistring-space - multiliteral-space { + if {$state eq "multistring-space"} { + set_tokenType stringpart + } else { + set_tokenType stringlit ;#review + } + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes#" + } + default { + #start of token if we're not in a token + set_tokenType comment + set tok "" ;#The hash is not part of the comment data + } + } } } lc { - set multi_dquote "" ;#!! - #test jmn2024 #left curly brace + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + #test jmn2024 try { if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - stringpart { - if {$slash_active} {append tok "\\"} + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit - squotedkey - itablesquotedkey { append tok $c } - starttablename { - error "unexpected tablename problem" - #$slash_active not relevant to this tokentype - #change the tokenType - switch_tokenType "starttablearrayname" - set tok "" ;#no output into the tomlish list for this token - #any following whitespace is part of the tablearrayname, so return now - return 1 + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart - stringlit { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - starttablearrayname { + #*bare* tablename can only contain letters,digits underscores + error "Invalid tablename first character \{ [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #valid in quoted parts + append tok $c } comment { - if {$slash_active} {append tok "\\"} + if {$had_slash} {append tok "\\"} append tok "\[" } default { @@ -2171,14 +2444,6 @@ namespace eval tomlish::parse { set tok "\{" return 1 } - multistring-space { - set_tokenType "stringpart" - if {$slash_active} { - set tok "\\\{" - } else { - set tok "\{" - } - } key-space { #invalid - but allow parser statemachine to report it. ? set_tokenType "startinlinetable" @@ -2191,6 +2456,18 @@ namespace eval tomlish::parse { set tok "\{" return 1 } + multistring-space - multiliteral-space { + if {$state eq "multistring-space"} { + set_tokenType stringpart + } else { + set_tokenType stringlit ;#review + } + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\{" + } default { error "state: '$state'. left brace case not implemented [tomlish::parse::report_line]" } @@ -2204,34 +2481,45 @@ namespace eval tomlish::parse { } rc { - set multi_dquote "" ;#!! #right curly brace - try { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + if {[string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} + stringlit - squotedkey - itablesquotedkey { append tok $c } - tablename { - if {$slash_active} {append tok "\\"} + string - quotedkey - itablequotedkey - comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + starttablename - tablename { + if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch dict set token_waiting type endinlinetable dict set token_waiting tok "" return 1 } - tablearrayname { - if {$slash_active} {append tok "\\"} + starttablearrayname - tablearrayname { + if {$had_slash} {append tok "\\"} #invalid! - but leave for datastructure loading stage to catch dict set token_waiting type endtablearrayname dict set token_waiting tok "" return 1 } itablevaltail { - + #review + error "right-curly in itablevaltail" } default { #end any other token @@ -2257,7 +2545,7 @@ namespace eval tomlish::parse { tablename { #e.g [] - empty tablename - allowed or not? #empty tablename/tablearrayname ? - error "unexpected tablename problem" + #error "unexpected tablename problem" set_tokenType "endinlinetable" set tok "" ;#no output into the tomlish list for this token @@ -2290,42 +2578,80 @@ namespace eval tomlish::parse { itablekeyval-syntax { error "endinlinetable unexpected at this point. Expecting key=val syntax [tomlish::parse::report_line]" } + multistring-space - multiliteral-space { + if {$state eq "multistring-space"} { + set_tokenType "stringpart" + } else { + set_tokenType "stringlit" ;#review + } + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\}" + } default { #JMN2024b keytail? error "state '$state'. endinlinetable case not implemented [tomlish::parse::report_line]" } } } - } on error {em} { - error $em - } finally { - set slash_active 0 - } } lb { - set multi_dquote "" ;#!! #left square bracket - try { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 + if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - stringpart { - if {$slash_active} {append tok "\\"} + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} append tok $c } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } starttablename { - #$slash_active not relevant to this tokentype #change the tokenType switch_tokenType "starttablearrayname" set tok "" ;#no output into the tomlish list for this token #any following whitespace is part of the tablearrayname, so return now return 1 } + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\[" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + #invalid at this point - state machine should disallow table -> starttablearrayname + dict set token_waiting type starttablearrayname + dict set token_waiting tok "" + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "\[" + } + } + } comment { - if {$slash_active} {append tok "\\"} + if {$had_slash} {append tok "\\"} append tok "\[" } default { @@ -2357,94 +2683,148 @@ namespace eval tomlish::parse { return 1 #error "state: array-space. startarray case not implemented [tomlish::parse::report_line]" } + multistring-space - multiliteral-space { + if {$state eq "multistring-space"} { + set_tokenType "stringpart" + } else { + set_tokenType "stringlit" ;#review + } + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok "$dquotes\[" + } default { error "state: '$state'. startarray case not implemented [tomlish::parse::report_line]" } } } - } on error {em} { - error $em - } finally { - set slash_active 0 - } } rb { - set multi_dquote "" ;#!! #right square bracket - try { + set dquotes $multi_dquote + set multi_dquote "" + set had_slash $slash_active + set slash_active 0 - if {[string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - _shortcircuit_startquotesequence - } - string - stringpart - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - tablename { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablename - dict set token_waiting tok "" - return 1 - } - tablearraynames { - if {$slash_active} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - dict set token_waiting type endtablearrayname - dict set token_waiting tok "" + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + _shortcircuit_startquotesequence + } + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit - squotedkey - itablesquotedkey { + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + whitespace { + if {$state eq "multistring-space"} { + #???? + incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 - } - default { + } else { incr i -1 + if {$had_slash} {incr i -1} ;#reprocess return 1 } } - } else { - #$slash_active not relevant when no tokenType - switch -exact -- $state { - value-expected { - #invalid - but allow parser statemachine to report it. - set_tokenType "endarray" - set tok "\]" - return 1 - } - key-space { - #invalid - but allow parser statemachine to report it. ? - set_tokenType "endarray" - set tok "\]" - return 1 + tablename { + #e.g a."x[0]".c is valid table name sequence - so we need to track quoting to know if rb is an end token + if {$had_slash} { + #resultant tablename may be invalid - but leave for datastructure loading stage to catch + append tok "\\]" + } else { + if {[tomlish::utils::tok_in_quotedpart $tok] eq ""} { + dict set token_waiting type endtablename + dict set token_waiting tok "" + return 1 + } else { + #we appear to still be in single or double quoted section + append tok "]" + } } - tablename { - #e.g [] - empty tablename - allowed or not? - #empty tablename/tablearrayname ? - error "unexpected tablename problem" + } + tablearraynames { + #todo? + if {$had_slash} {append tok "\\"} + #invalid! - but leave for datastructure loading stage to catch + dict set token_waiting type endtablearrayname + dict set token_waiting tok "" + return 1 + } + default { + incr i -1 + return 1 + } + } + } else { + #$slash_active not relevant when no tokenType + switch -exact -- $state { + value-expected { + #invalid - but allow parser statemachine to report it. + set_tokenType "endarray" + set tok "\]" + return 1 + } + key-space { + #invalid - but allow parser statemachine to report it. ? + set_tokenType "endarray" + set tok "\]" + return 1 + } + tablename { + #e.g [] - empty tablename - allowed or not? + #empty tablename/tablearrayname ? + #error "unexpected tablename problem" - set_tokenType "endtablename" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - tablearrayname { - error "unexpected tablearrayname problem" - set_tokenType "endtablearray" - set tok "" ;#no output into the tomlish list for this token - return 1 - } - array-syntax - array-space { - set_tokenType "endarray" - set tok "\]" - return 1 + set_tokenType "endtablename" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + tablearrayname { + error "unexpected tablearrayname problem" + set_tokenType "endtablearray" + set tok "" ;#no output into the tomlish list for this token + return 1 + } + array-syntax - array-space { + set_tokenType "endarray" + set tok "\]" + return 1 + } + multistring-space - multiliteral-space { + if {$state eq "multistring-space"} { + set_tokenType "stringpart" + } else { + set_tokenType "stringlit" ;#review } - default { - error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" + set tok "" + if {$had_slash} { + append tok "\\" } + append tok "$dquotes\]" + } + default { + error "state '$state'. endarray case not implemented [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } bsl { @@ -2456,7 +2836,26 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - litstring - multilitstring - comment - tablename - tablearrayname { + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + whitespace { + if {$state eq "multistring-space"} { + #end whitespace token + incr i -1 ;#reprocess bsl in next run + return 1 + } else { + error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + } + } + stringlit - squotedkey - itablesquotedkey { + #never need to set slash_active true when in stringlit + append tok "\\" + set slash_active 0 + } + string - quotedkey - itablequotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -2474,13 +2873,15 @@ namespace eval tomlish::parse { set slash_active 1 } } - whitespace { - if {$state eq "multistring-space"} { - #end whitespace token - incr i -1 - return 1 + starttablename - starttablearrayname { + error "backslash is invalid as first character of $tokenType [tomlish::parse::report_line]" + } + tablename - tablearrayname { + if {$slash_active} { + set slash_active 0 + append tok "\\\\" } else { - error "Unexpected backslash during whitespace. [tomlish::parse::report_line]" + set slash_active 1 } } barekey { @@ -2492,181 +2893,314 @@ namespace eval tomlish::parse { } } else { if {$state eq "multistring-space"} { - set slash_active 1 + if {$slash_active} { + set_tokenType "stringpart" + set tok "\\\\" + set slash_active 0 + } else { + if {$dquotes ne ""} { + set_tokenType "stringpart" + set tok $dquotes + } + set slash_active 1 + } } else { error "Unexpected backslash when no token is active. [tomlish::parse::report_line]" } } } - dq { - #double quote - try { - if {[::string length $tokenType]} { - switch -exact -- $tokenType { - startquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { + sq { + #single quote + set had_slash $slash_active + set slash_active 0 + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } + startquotesequence { + _shortcircuit_startquotesequence + } + startsquotesequence { + switch -- [tcl::string::length $tok] { + 1 { append tok $c - } elseif {$toklen == 2} { + } + 2 { + #switch? append tok $c - set_tokenType "startmultiquote" + set_tokenType startmultisquote return 1 - } else { - error "unexpected token length in 'startquotesequence'" + } + default { + error "unexpected token length [tcl::string::length $tok] in 'startsquotesequence'" } } - endquotesequence { - set toklen [::string length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - set_tokenType "endmultiquote" + } + stringlit { + #slash_active always false + #terminate the stringlit + dict set token_waiting type endsquote + dict set token_waiting tok "'" + return 1 + } + squotedkey - itablesquotedkey { + dict set token_waiting type endsquote + dict set token_waiting tok "'" + return 1 + } + starttablename - starttablearrayname { + #!!! + incr i -1 + return 1 + } + tablename - tablearrayname { + append tok $c + } + default { + append tok $c + } + } + } else { + switch -exact -- $state { + value-expected - array-space { + #todo - multilitstring startsquotesequence? + set_tokenType "startsquotesequence" + set tok "'" + } + key-space { + set_tokenType "startsquote" + set tok $c + return 1 + } + curly-space { + set_tokenType "startsquote" + set tok $c + return 1 + } + tablename - tablearrayname { + #first char in tablename/tablearrayname state + set_tokenType $state ;#token name matches state name for tablename/tablearrayname + append tok "'" + } + stringlit { + tomlish::log::debug "sq during stringlit state with no tokentype - empty stringlit?" + set_tokenType stringlit + incr -1 + return 1 + } + multistring-space { + + } + default { + error "unhandled squote during state '$state'. [tomlish::parse::report_line]" + } + } + } + + } + dq { + #double quote + set had_slash $slash_active + set slash_active 0 + + if {[::string length $tokenType]} { + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + append tok $c + } elseif {$toklen == 2} { + append tok $c + #switch vs set? + set_tokenType "startmultiquote" + return 1 + } else { + error "unexpected token length $toklen in 'startquotesequence'" + } + } + startsquotesequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + set_tokenType "startsquote" + incr i -1 return 1 - } else { - error "unexpected token length in 'endquotesequence'" } - } - string { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - #unescaped quote always terminates a string? - dict set token_waiting type endquote - dict set token_waiting tok "\"" + 2 { + set_tokenType "startsquote" + incr i -2 return 1 } + default { + error "unexpected startsquotesequence length $toklen" + } } - stringpart { - #sub element of multistring - if {$slash_active} { - append tok "\\" - append tok $c + } + stringlit { + append tok $c + } + string { + if {$had_slash} { + append tok "\\" $c + } else { + #unescaped quote always terminates a string? + dict set token_waiting type endquote + dict set token_waiting tok "\"" + return 1 + } + } + stringpart { + #sub element of multistring + if {$had_slash} { + append tok "\\" $c + } else { + #incr i -1 + + if {$multi_dquote eq "\"\""} { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 } else { - #incr i -1 - + append multi_dquote "\"" + } + } + } + whitespace { + switch -exact -- $state { + multistring-space { + #REVIEW + if {$had_slash} { + incr i -2 + return 1 + } else { + switch -- [tcl::string::length $multi_dquote] { + 2 { + dict set token_waiting type endmultiquote + dict set token_waiting tok "\"\"\"" + set multi_dquote "" + return 1 + } + 1 { + incr i -2 + return 1 + } + 0 { + incr i -1 + return 1 + } + } + } + } + value-expected { if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote + dict set token_waiting type startmultiquote dict set token_waiting tok "\"\"\"" set multi_dquote "" return 1 } else { - append multi_dquote "\"" - } - } - } - whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } - } - value-expected { - if {$multi_dquote eq "\"\""} { - dict set token_waiting type startmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" - return 1 - } else { - #end whitespace token and reprocess - incr i -1 - return 1 - #append multi_dquote "\"" - } - } - default { - dict set token_waiting type startquote - dict set token_waiting tok "\"" - return 1 + #end whitespace token and reprocess + incr i -1 + return 1 } } - } - comment { - if {$slash_active} {append tok "\\"} - append tok $c - } - quotedkey - itablequotedkey { - if {$slash_active} { - append tok "\\" - append tok $c - } else { - dict set token_waiting type endquote + default { + dict set token_waiting type startquote dict set token_waiting tok "\"" - return 1 + return 1 } } - tablename - tablearrayname { - if {$slash_active} {append tok "\\"} + } + comment { + if {$had_slash} {append tok "\\"} + append tok $c + } + quotedkey - itablequotedkey { + if {$had_slash} { + append tok "\\" append tok $c - } - starttablename - starttablearrayname { - incr i -1 ;## + } else { + dict set token_waiting type endquote + dict set token_waiting tok "\"" return 1 } - default { - error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" - } } - } else { - #$slash_active not relevant when no tokenType - #token is string only if we're expecting a value at this point - switch -exact -- $state { - value-expected - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType startquotesequence ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - multistring-space { - #REVIEW + squotedkey - itablesquotedkey { + append tok $c + } + tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok $c + } + starttablename - starttablearrayname { + incr i -1 ;## + return 1 + } + default { + error "got quote during tokenType '$tokenType' [tomlish::parse::report_line]" + } + } + } else { + #$slash_active not relevant when no tokenType + #token is string only if we're expecting a value at this point + switch -exact -- $state { + value-expected - array-space { + #!? start looking for possible multistartquote + #set_tokenType startquote + #set tok $c + #return 1 + set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote + set tok $c + } + multistring-space { + #TODO - had_slash!!! + #REVIEW + if {$had_slash} { + set_tokenType "stringpart" + set tok "\\\"" + set multi_dquote "" + } else { if {$multi_dquote eq "\"\""} { - dict set token_waiting type endmultiquote - dict set token_waiting tok "\"\"\"" - set multi_dquote "" + tomlish::log::debug "---> endmultiquote" + set_tokenType "endmultiquote" + set tok "\"\"\"" return 1 + #dict set token_waiting type endmultiquote + #dict set token_waiting tok "\"\"\"" + #set multi_dquote "" + #return 1 } else { append multi_dquote "\"" } } - key-space { - set tokenType startquote - set tok $c - return 1 - } - curly-space { - set tokenType startquote - set tok $c - return 1 - } - tablename - tablearrayname { - set_tokenType $state - set tok $c - } - default { - error "Unexpected quote during state '$state' [tomlish::parse::report_line]" - } + } + key-space { + set_tokenType "startquote" + set tok $c + return 1 + } + curly-space { + set_tokenType "startquote" + set tok $c + return 1 + } + tablename - tablearrayname { + set_tokenType $state + set tok $c + } + default { + error "Unexpected quote during state '$state' [tomlish::parse::report_line]" } } - } on error {em} { - error $em - } finally { - set slash_active 0 } } = { set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 if {[::string length $tokenType]} { @@ -2674,23 +3208,47 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey { + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit - squotedkey { + #assertion had_slash 0, multi_dquote "" + append tok $c + } + string - comment - quotedkey - itablequotedkey { #for these tokenTypes an = is just data. + if {$had_slash} {append tok "\\"} append tok $c } stringpart { + if {$had_slash} {append tok "\\"} append tok $dquotes$c } whitespace { - dict set token_waiting type equal - dict set token_waiting tok = - return 1 + if {$state in {multistring-space multiliteral-space}} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + dict set token_waiting type equal + dict set token_waiting tok = + return 1 + } } barekey { dict set token_waiting type equal dict set token_waiting tok = return 1 } + starttablename - starttablearrayname { + error "Character '=' is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #invalid in bare name - but valid in quoted parts - leave for tablename parser to sort out + append tok $c + } default { error "unexpected = character during tokentype $tokenType. case not implemented. [tomlish::parse::report_line]" } @@ -2698,11 +3256,16 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok ${dquotes}= + set_tokenType "stringpart" + set tok "" + if {$had_slash} { + append tok "\\" + } + append tok ${dquotes}= } + default { - set_tokenType equal + set_tokenType "equal" set tok = return 1 } @@ -2710,6 +3273,7 @@ namespace eval tomlish::parse { } } cr { + #REVIEW! set dquotes $multi_dquote set multi_dquote "" ;#!! # \r carriage return @@ -2720,9 +3284,25 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit { + append tok $c + } stringpart { append tok $dquotes$c } + starttablename - starttablearrayname { + error "Character is invalid first character for $tokenType. [tomlish::parse::report_line]" + } + tablename - tablearrayname { + #could in theory be valid in quoted part of name + #review - might be better just to disallow here + append tok $c + } default { #!todo - error out if cr inappropriate for tokenType append tok $c @@ -2731,21 +3311,30 @@ namespace eval tomlish::parse { } else { #lf may be appended if next #review - lone cr as newline? - this is uncommon - but so is lone cr in a string(?) - set_tokenType newline + set_tokenType "newline" set tok cr } } lf { + # \n newline set dquotes $multi_dquote set multi_dquote "" ;#!! - # \n newline + set had_slash $slash_active + set slash_active 0 if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. - set slash_active 0 switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit { + #review + append tok $c + } newline { #this lf is the trailing part of a crlf append tok lf @@ -2757,11 +3346,22 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - dict set token_waiting type newline - dict set token_waiting tok lf - return 1 + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + dict set token_waiting type continuation + dict set token_waiting tok \\ + incr i -1 + return 1 + } else { + dict set token_waiting type newline + dict set token_waiting tok lf + return 1 + } } } + starttablename - tablename - tablearrayname - starttablearrayname { + error "Character is invalid in $tokenType. [tomlish::parse::report_line]" + } default { #newline ends all other tokens. #note for string: we don't add (raw unescaped) newline to simple string. (must use multi-string for this) @@ -2771,59 +3371,115 @@ namespace eval tomlish::parse { #puts "-------------- newline lf during tokenType $tokenType" dict set token_waiting type newline - dict set token_waiting tok lf + dict set token_waiting tok lf return 1 } } } else { - set had_slash $slash_active - set slash_active 0 - if {$had_slash} { - set_tokenType "continuation" - set tok "\\" - incr i -1 - return 1 - } else { - set_tokenType newline - set tok lf - return 1 + switch -exact -- $state { + multistring-space { + if {$had_slash} { + set_tokenType "continuation" + set tok "\\" + incr i -1 + return 1 + } else { + if {$dquotes ne ""} { + #e.g one or 2 quotes just before nl + set_tokenType "stringpart" + set tok $dquotes + incr i -1 + return 1 + } + set_tokenType "newline" + set tok lf + return 1 + } + } + default { + #ignore slash? error? + set_tokenType "newline" + set tok lf + return 1 + } } + #if {$had_slash} { + # #CONT directly before newline - allows strings_5_byteequivalent test to pass + # set_tokenType "continuation" + # set tok "\\" + # incr i -1 + # return 1 + #} else { + # set_tokenType newline + # set tok lf + # return 1 + #} } } , { set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set multi_dquote "" + set had_slash $slash_active set slash_active 0 if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - comment - quotedkey - tablename - tablearrayname { + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - tablename - tablearrayname { + if {$had_slash} {append tok "\\"} + append tok , + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} + append tok $c + } + stringlit - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" append tok $c } stringpart { + if {$had_slash} {append tok "\\"} append tok $dquotes$c } + whitespace { + if {$state eq "multistring-space"} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + dict set token_waiting type comma + dict set token_waiting tok "," + return 1 + } + } default { dict set token_waiting type comma dict set token_waiting tok "," + if {$had_slash} {append tok "\\"} return 1 } } } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "," + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes," } multiliteral-space { - set_tokenType literalpart + #assert had_slash 0, multi_dquote "" + set_tokenType "stringlit" set tok "," } default { - set_tokenType comma + set_tokenType "comma" set tok "," return 1 } @@ -2831,24 +3487,61 @@ namespace eval tomlish::parse { } } . { + set dquotes $multi_dquote set multi_dquote "" ;#!! - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + set had_slash $slash_active set slash_active 0 if {[::string length $tokenType]} { switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } - string - stringpart - comment - quotedkey - untyped-value { + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + comment - untyped-value { + if {$had_slash} {append tok "\\"} + append tok $c + } + string - quotedkey - itablequotedkey { + if {$had_slash} {append tok "\\"} append tok $c } - baretablename - tablename - tablearrayname { + stringlit - squotedkey - itablesquotedkey { + #assert had_slash always 0, multi_dquote "" + append tok $c + } + stringpart { + if {$had_slash} {append tok "\\"} + append tok $dquotes$c + } + whitespace { + if {$state eq "multistring-space"} { + set backchars [expr {[tcl::string::length $dquotes] + 1}] + if {$had_slash} { + incr backchars 1 + } + incr i -$backchars + return 1 + } else { + error "Received period during tokenType 'whitespace' [tomlish::parse::report_line]" + } + } + starttablename - starttablearrayname { + #This would correspond to an empty table name + error "Character '.' is not allowed as first character ($tokenType). [tomlish::parse::report_line]" + } + tablename - tablearrayname { #subtable - split later - review append tok $c } barekey { + #e.g x.y = 1 #we need to transition the barekey to become a structured table name ??? review - switch_tokenType tablename + #x is the tablename y is the key + switch_tokenType tablenamepluskey incr i -1 #error "barekey period unimplemented" @@ -2863,15 +3556,19 @@ namespace eval tomlish::parse { } else { switch -exact -- $state { multistring-space { - set_tokenType stringpart - set tok "." + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." } multiliteral-space { - set_tokenType literalpart - set tok "." + set_tokenType "literalpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "$dquotes." } default { - set_tokenType untyped-value + set_tokenType "untyped-value" set tok "." } } @@ -2888,7 +3585,13 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } barekey { + #todo had_slash - emit token or error #whitespace is a terminator for bare keys #dict set token_waiting type whitespace #dict set token_waiting tok $c @@ -2906,22 +3609,34 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } + append tok $dquotes$c + } + string - quotedkey - itablequotedkey { + if {$had_slash} { append tok "\\" } append tok $c } - quotedkey - string { - if {$had_slash} { - append tok "\\" - } - #if {$dquotes eq "\""} { - #} + stringlit - squotedkey - itablesquotedkey { append tok $c } whitespace { - append tok $c + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + #end whitespace token + #go back by the number of quotes plus this space char + set backchars [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backchars + return 1 + } else { + append tok $c + } + } else { + append tok $c + } } stringpart { if {$had_slash} { #REVIEW + #emit the stringpart - go back to the slash incr i -2 return 1 } else { @@ -2932,11 +3647,7 @@ namespace eval tomlish::parse { return 1 } } - starttablename { - incr i -1 - return 1 - } - starttablearrayname { + starttablename - starttablearrayname { incr i -1 return 1 } @@ -2951,9 +3662,7 @@ namespace eval tomlish::parse { } } else { set had_slash $slash_active - if {$slash_active} { - set slash_active 0 - } + set slash_active 0 switch -exact -- $state { tablename - tablearrayname { #tablename can have leading,trailing and interspersed whitespace! @@ -2976,7 +3685,7 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok $dquotes incr i -1 - return + return 1 } set_tokenType "whitespace" append tok $c @@ -2997,12 +3706,20 @@ namespace eval tomlish::parse { set multi_dquote "" ;#!! if {[::string length $tokenType]} { - if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. + if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 switch -exact -- $tokenType { startquotesequence { _shortcircuit_startquotesequence } + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit { + append tok $c + } barekey { #whitespace is a terminator for bare keys incr i -1 @@ -3017,7 +3734,7 @@ namespace eval tomlish::parse { incr i -1 return 1 } - quotedkey { + quotedkey - itablequotedkey { append tok $c } string - comment - whitespace { @@ -3078,9 +3795,27 @@ namespace eval tomlish::parse { } bom { #BOM (Byte Order Mark) - ignored by token consumer - set_tokenType "bom" - set tok "\uFEFF" - return 1 + if {[string length $tokenType]} { + switch -exact -- $tokenType { + startsquotesequence { + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 + } + stringlit { + append tok $c + } + default { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } + } + } else { + set_tokenType "bom" + set tok "\uFEFF" + return 1 + } } default { set dquotes $multi_dquote @@ -3093,12 +3828,27 @@ namespace eval tomlish::parse { startquotesequence { _shortcircuit_startquotesequence } - endquotesequence { - puts stderr "endquotesequence: $tok" + startsquotesequence { + puts stdout "HERE $c" + incr i -[tcl::string::length $tok] + set_tokenType "startsquote" + return 1 } whitespace { - incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. - return 1 + if {$state eq "multistring-space"} { + if {$dquotes ne ""} { + set backlen [expr {[tcl::string::length $dquotes] + 1}] + incr i -$backlen + return 1 + } else { + incr i -1 + return 1 + } + } else { + #review + incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. + return 1 + } } barekey { if {[tomlish::utils::is_barekey $c]} { @@ -3116,7 +3866,7 @@ namespace eval tomlish::parse { append tok $dquotes$c } default { - #e.g comment/string/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname + #e.g comment/string/stringlit/untyped-value/starttablename/starttablearrayname/tablename/tablearrayname append tok $c } } @@ -3151,6 +3901,7 @@ namespace eval tomlish::parse { set tok $c } default { + tomlish::log::debug "char '$c' setting to untyped-value while state:$state" set_tokenType "untyped-value" set tok $c } @@ -3167,20 +3918,39 @@ namespace eval tomlish::parse { #if {$state eq "err"} { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} - if {$tokenType eq "startquotesequence"} { - set toklen [::string length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - eror "eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #dict set token_waiting type "string" - #dict set token_waiting tok "" - return 1 + switch -exact -- $tokenType { + startquotesequence { + set toklen [::string length $tok] + if {$toklen == 1} { + #invalid + #eof with open string + error "eof reached without closing quote for string. [tomlish::parse::report_line]" + } elseif {$toklen == 2} { + #valid + #we ended in a double quote, not actually a startquoteseqence - effectively an empty string + switch_tokenType "startquote" + incr i -1 + #dict set token_waiting type "string" + #dict set token_waiting tok "" + return 1 + } + } + startsquotesequence { + set toklen [::string length $tok] + switch -- $toklen { + 1 { + #invalid eof with open stringlit + error "eof reached without closing single quote for string literal. [tomlish::parse::report_line]" + } + 2 { + dict set token_waiting type endsquote + dict set token_waiting tok "'" + ### + set_tokenType "stringlit" + set tok "" + return 1 + } + } } } dict set token_waiting type "eof"