JFIFXX    $.' ",#(7),01444'9=82<.342  2!!22222222222222222222222222222222222222222222222222"4 ,PG"Z_4˷kjزZ,F+_z,© zh6٨icfu#ډb_N?wQ5-~I8TK<5oIv-k_U_~bMdӜUHh?]EwQk{_}qFW7HTՑYF?_'ϔ_Ջt=||I 6έ"D/[k9Y8ds|\Ҿp6Ҵ].6znopM[mei$[soᘨ˸ nɜG-ĨUycP3.DBli;hjx7Z^NhN3u{:jx힞#M&jL P@_ P&o89@Sz6t7#Oߋ s}YfTlmrZ)'Nk۞pw\Tȯ?8`Oi{wﭹW[r Q4F׊3m&L=h3z~#\l :F,j@ ʱwQT8"kJO6֚l}R>ډK]y&p}b;N1mr$|7>e@BTM*-iHgD) Em|ؘbҗaҾt4oG*oCNrPQ@z,|?W[0:n,jWiEW$~/hp\?{(0+Y8rΟ+>S-SVN;}s?. w9˟<Mq4Wv'{)01mBVW[8/< %wT^5b)iM pgN&ݝVO~qu9 !J27$O-! :%H ـyΠM=t{!S oK8txA& j0 vF Y|y ~6@c1vOpIg4lODL Rcj_uX63?nkWyf;^*B @~a`Eu+6L.ü>}y}_O6͐:YrGXkGl^w~㒶syIu! W XN7BVO!X2wvGRfT#t/?%8^WaTGcLMI(J1~8?aT ]ASE(*E} 2#I/׍qz^t̔bYz4xt){ OH+(EA&NXTo"XC')}Jzp ~5}^+6wcQ|LpdH}(.|kc4^"Z?ȕ a<L!039C EuCFEwç ;n?*oB8bʝ'#RqfM}7]s2tcS{\icTx;\7KPʇ Z O-~c>"?PEO8@8GQgaՎ󁶠䧘_%#r>1zaebqcPѵn#L =׀t L7`VA{C:ge@w1 Xp3c3ġpM"'-@n4fGB3DJ8[JoߐgK)ƛ$ 83+ 6ʻ SkI*KZlT _`?KQKdB`s}>`*>,*@JdoF*弝O}ks]yߘc1GV<=776qPTtXԀ!9*44Tހ3XΛex46YD  BdemDa\_l,G/֌7Y](xTt^%GE4}bTڹ;Y)BQu>J/J ⮶.XԄjݳ+Ed r5_D1 o Bx΢#<W8R6@gM. drD>(otU@x=~v2 ӣdoBd3eO6㣷ݜ66YQz`S{\P~z m5{J/L1xO\ZFu>ck#&:`$ai>2ΔloF[hlEܺΠk:)` $[69kOw\|8}ބ:񶐕IA1/=2[,!.}gN#ub ~݊}34qdELc$"[qU硬g^%B zrpJru%v\h1Yne`ǥ:gpQM~^Xi `S:V29.PV?Bk AEvw%_9CQwKekPؠ\;Io d{ ߞoc1eP\ `E=@KIRYK2NPlLɀ)&eB+ь( JTx_?EZ }@ 6U뙢طzdWIn` D噥[uV"G&Ú2g}&m?ċ"Om# {ON"SXNeysQ@FnVgdX~nj]J58up~.`r\O,ư0oS _Ml4kv\JSdxSW<AeIX$Iw:Sy›R9Q[,5;@]%u@ *rolbI  +%m:͇ZVủθau,RW33 dJeTYE.Mϧ-oj3+yy^cVO9NV\nd1 !͕_)av;թMlWR1)ElP;yوÏu 3k5Pr6<⒲l!˞*u־n!l:UNW %Chx8vL'X@*)̮ˍ D-M+JUkvK+x8cY?Ԡ~3mo|u@[XeYC\Kpx8oCC&N~3-H MXsu<`~"WL$8ξ3a)|:@m\^`@ҷ)5p+6p%i)P Mngc#0AruzRL+xSS?ʮ}()#tmˇ!0}}y$6Lt;$ʳ{^6{v6ķܰgVcnn ~zx«,2u?cE+ȘH؎%Za)X>uWTzNyosFQƤ$*&LLXL)1" LeOɟ9=:tZcŽY?ӭVwv~,Yrۗ|yGaFC.+ v1fήJ]STBn5sW}y$~z'c 8  ,! pVNSNNqy8z˱A4*'2n<s^ǧ˭PJޮɏUGLJ*#i}K%,)[z21z ?Nin1?TIR#m-1lA`fT5+ܐcq՝ʐ,3f2Uեmab#ŠdQy>\)SLYw#.ʑf ,"+w~N'cO3FN<)j&,- љ֊_zSTǦw>?nU仆Ve0$CdrP m׈eXmVu L.bֹ [Դaզ*\y8Է:Ez\0KqC b̘cөQ=0YsNS.3.Oo:#v7[#߫ 5܎LEr49nCOWlG^0k%;YߝZǓ:S#|}y,/kLd TA(AI$+I3;Y*Z}|ӧOdv..#:nf>>ȶITX 8y"dR|)0=n46ⲑ+ra ~]R̲c?6(q;5% |uj~z8R=XIV=|{vGj\gcqz؋%Mߍ1y#@f^^>N#x#۹6Y~?dfPO{P4Vu1E1J *|%JN`eWuzk M6q t[ gGvWIGu_ft5j"Y:Tɐ*; e54q$C2d} _SL#mYpO.C;cHi#֩%+) ӍƲVSYźg |tj38r|V1#;.SQA[S#`n+$$I P\[@s(EDzP])8G#0B[ىXIIq<9~[Z멜Z⊔IWU&A>P~#dp]9 "cP Md?٥Ifتuk/F9c*9Ǎ:ØFzn*@|Iށ9N3{'['ͬҲ4#}!V Fu,,mTIkv C7vB6kT91*l '~ƞFlU'M ][ΩũJ_{iIn$L jOdxkza۪#EClx˘oVɞljr)/,߬hL#^Lф,íMƁe̩NBLiLq}(q6IçJ$WE$:=#(KBzђ xlx?>Պ+>W,Ly!_DŌlQ![ SJ1ƐY}b,+Loxɓ)=yoh@꥟/Iѭ=Py9 ۍYӘe+pJnϱ?V\SO%(t =?MR[Șd/ nlB7j !;ӥ/[-A>dNsLj ,ɪv=1c.SQO3UƀܽE̻9GϷD7(}Ävӌ\y_0[w <΍>a_[0+LF.޺f>oNTq;y\bՃyjH<|q-eɏ_?_9+PHp$[uxK wMwNی'$Y2=qKBP~Yul:[<F12O5=d]Ysw:ϮEj,_QXz`H1,#II dwrP˂@ZJVy$\y{}^~[:NߌUOdؾe${p>G3cĖlʌ ת[`ϱ-WdgIig2 }s ؤ(%#sS@~3XnRG~\jc3vӍLM[JBTs3}jNʖW;7ç?=XF=-=qߚ#='c7ڑWI(O+=:uxqe2zi+kuGR0&eniT^J~\jyp'dtGsO39* b#Ɋ p[BwsT>d4ۧsnvnU_~,vƜJ1s QIz)(lv8MU=;56Gs#KMP=LvyGd}VwWBF'à ?MHUg2 !p7Qjڴ=ju JnA suMeƆҔ!)'8Ϣٔޝ(Vpצ֖d=ICJǠ{qkԭ߸i@Ku|p=..*+xz[Aqġ#s2aƊRR)*HRsi~a &fMP-KL@ZXy'x{}Zm+:)) IJ-iu ܒH'L(7yGӜq j 6ߌg1go,kرtY?W,pefOQS!K۟cҒA|սj>=⬒˧L[ ߿2JaB~Ru:Q] 0H~]7ƼI(}cq 'ήETq?fabӥvr )o-Q_'ᴎoK;Vo%~OK *bf:-ťIR`B5!RB@ï u ̯e\_U_ gES3QTaxU<~c?*#]MW,[8Oax]1bC|踤Plw5V%){t<d50iXSUm:Z┵i"1^B-PhJ&)O*DcWvM)}Pܗ-q\mmζZ-l@}aE6F@&Sg@ݚM ȹ 4#p\HdYDoH"\..RBHz_/5˘6KhJRPmƶim3,#ccoqa)*PtRmk7xDE\Y閣_X<~)c[[BP6YqS0%_;Àv~| VS؇ 'O0F0\U-d@7SJ*z3nyPOm~P3|Yʉr#CSN@ ƮRN)r"C:: #qbY. 6[2K2uǦHYRQMV G$Q+.>nNHq^ qmMVD+-#*U̒ p욳u:IBmPV@Or[b= 1UE_NmyKbNOU}the`|6֮P>\2PVIDiPO;9rmAHGWS]J*_G+kP2KaZH'KxWMZ%OYDRc+o?qGhmdSoh\D|:WUAQc yTq~^H/#pCZTI1ӏT4"ČZ}`w#*,ʹ 0i課Om*da^gJ݅{le9uF#Tֲ̲ٞC"qߍ ոޑo#XZTp@ o8(jdxw],f`~|,s^f1t|m򸄭/ctr5s79Q4H1꠲BB@l9@C+wpxu£Yc9?`@#omHs2)=2.ljg9$YS%*LRY7Z,*=䷘$armoϰUW.|rufIGwtZwo~5 YյhO+=8fF)W7L9lM̘·Y֘YLf큹pRF99.A "wz=E\Z'a 2Ǚ#;'}G*l^"q+2FQ hjkŦ${ޮ-T٭cf|3#~RJt$b(R(rdx >U b&9,>%E\ Άe$'q't*אެb-|dSBOO$R+H)܎K1m`;J2Y~9Og8=vqD`K[F)k[1m޼cn]skz$@)!I x՝"v9=ZA=`Ɠi :E)`7vI}dYI_ o:obo 3Q&D&2= Ά;>hy.*ⅥSӬ+q&j|UƧ}J0WW< ۋS)jQRjƯrN)Gű4Ѷ(S)Ǣ8iW52No˓ ۍ%5brOnL;n\G=^UdI8$&h'+(cȁ߫klS^cƗjԌEꭔgFȒ@}O*;evWVYJ\]X'5ղkFb 6Ro՜mi Ni>J?lPmU}>_Z&KKqrIDՉ~q3fL:Se>E-G{L6pe,8QIhaXaUA'ʂs+טIjP-y8ۈZ?J$WP Rs]|l(ԓsƊio(S0Y 8T97.WiLc~dxcE|2!XKƘਫ਼$((6~|d9u+qd^389Y6L.I?iIq9)O/뚅OXXVZF[یgQLK1RҖr@v#XlFНyS87kF!AsM^rkpjPDyS$Nqnxҍ!Uf!ehi2m`YI9r6 TFC}/y^Η5d'9A-J>{_l+`A['յϛ#w:݅%X}&PStQ"-\縵/$ƗhXb*yBS;Wջ_mcvt?2}1;qSdd~u:2k52R~z+|HE!)Ǟl7`0<,2*Hl-x^'_TVgZA'j ^2ΪN7t?w x1fIzC-ȖK^q;-WDvT78Z hK(P:Q- 8nZ܃e貾<1YT<,"6{/ ?͟|1:#gW>$dJdB=jf[%rE^il:BxSּ1հ,=*7 fcG#q eh?27,!7x6nLC4x},GeǝtC.vS F43zz\;QYC,6~;RYS/6|25vTimlv& nRh^ejRLGf? ۉҬܦƩ|Ȱ>3!viʯ>vオX3e_1zKȗ\qHS,EW[㺨uch⍸O}a>q6n6N6qN ! 1AQaq0@"2BRb#Pr3C`Scst$4D%Td ?Na3mCwxAmqmm$4n淿t'C"wzU=D\R+wp+YT&պ@ƃ3ޯ?AﶂaŘ@-Q=9Dռѻ@MVP܅G5fY6# ?0UQ,IX(6ڵ[DIMNލc&υj\XR|,4 jThAe^db#$]wOӪ1y%LYm뭛CUƃߜ}Cy1XνmF8jI]HۺиE@Ii;r8ӭVFՇ| &?3|xBMuSGe=Ӕ#BE5GY!z_eqр/W>|-Ci߇t1ޯќdR3ug=0 5[?#͏qcfH{ ?u=??ǯ}ZzhmΔBFTWPxs}G93 )gGR<>r h$'nchPBjJҧH -N1N?~}-q!=_2hcMlvY%UE@|vM2.Y[|y"EïKZF,ɯ?,q?vM 80jx";9vk+ ֧ ȺU?%vcVmA6Qg^MA}3nl QRNl8kkn'(M7m9وq%ޟ*h$Zk"$9: ?U8Sl,,|ɒxH(ѷGn/Q4PG%Ա8N! &7;eKM749R/%lc>x;>C:th?aKXbheᜋ^$Iհ hr7%F$EFdt5+(M6tÜUU|zW=aTsTgdqPQb'm1{|YXNb P~F^F:k6"j! Ir`1&-$Bevk:y#ywI0x=D4tUPZHڠ底taP6b>xaQ# WeFŮNjpJ* mQN*I-*ȩFg3 5Vʊɮa5FO@{NX?H]31Ri_uѕ 0 F~:60p͈SqX#a5>`o&+<2D: ڝ$nP*)N|yEjF5ټeihyZ >kbHavh-#!Po=@k̆IEN@}Ll?jO߭ʞQ|A07xwt!xfI2?Z<ץTcUj]陎Ltl }5ϓ$,Omˊ;@OjEj(ا,LXLOЦ90O .anA7j4 W_ٓzWjcBy՗+EM)dNg6y1_xp$Lv:9"zpʙ$^JԼ*ϭo=xLj6Ju82AH3$ٕ@=Vv]'qEz;I˼)=ɯx /W(Vp$ mu񶤑OqˎTr㠚xsrGCbypG1ߠw e8$⿄/M{*}W]˷.CK\ުx/$WPwr |i&}{X >$-l?-zglΆ(FhvS*b߲ڡn,|)mrH[a3ר[13o_U3TC$(=)0kgP u^=4 WYCҸ:vQרXàtkm,t*^,}D* "(I9R>``[~Q]#afi6l86:,ssN6j"A4IuQ6E,GnHzSHOuk5$I4ؤQ9@CwpBGv[]uOv0I4\yQѸ~>Z8Taqޣ;za/SI:ܫ_|>=Z8:SUIJ"IY8%b8H:QO6;7ISJҌAά3>cE+&jf$eC+z;V rʺmyeaQf&6ND.:NTvm<- uǝ\MvZYNNT-A>jr!SnO 13Ns%3D@`ܟ 1^c< aɽ̲Xë#w|ycW=9I*H8p^(4՗karOcWtO\ƍR8'KIQ?5>[}yUײ -h=% qThG2)"ו3]!kB*pFDlA,eEiHfPs5H:Փ~H0DتDIhF3c2E9H5zԑʚiX=:mxghd(v׊9iSOd@0ڽ:p5h-t&Xqӕ,ie|7A2O%PEhtjY1wЃ!  ࢽMy7\a@ţJ 4ȻF@o̒?4wx)]P~u57X 9^ܩU;Iꭆ 5 eK27({|Y׎ V\"Z1 Z}(Ǝ"1S_vE30>p; ΝD%xW?W?vo^Vidr[/&>~`9Why;R ;;ɮT?r$g1KACcKl:'3 cﳯ*"t8~l)m+U,z`(>yJ?h>]vЍG*{`;y]IT ;cNUfo¾h/$|NS1S"HVT4uhǜ]v;5͠x'C\SBplh}N ABx%ޭl/Twʽ]D=Kžr㻠l4SO?=k M: cCa#ha)ѐxcsgPiG{+xQI= zԫ+ 8"kñj=|c yCF/*9жh{ ?4o kmQNx;Y4膚aw?6>e]Qr:g,i"ԩA*M7qB?ӕFhV25r[7 Y }LR}*sg+xr2U=*'WSZDW]WǞ<叓{$9Ou4y90-1'*D`c^o?(9uݐ'PI& fJݮ:wSjfP1F:X H9dԯ˝[_54 }*;@ܨ ðynT?ןd#4rGͨH1|-#MrS3G3).᧏3vz֑r$G"`j 1tx0<ƆWh6y6,œGagAyb)hDß_mü gG;evݝnQ C-*oyaMI><]obD":GA-\%LT8c)+y76oQ#*{(F⽕y=rW\p۩cA^e6KʐcVf5$'->ՉN"F"UQ@fGb~#&M=8טJNu9D[̤so~ G9TtW^g5y$bY'سǴ=U-2 #MCt(i lj@Q 5̣i*OsxKf}\M{EV{υƇ);HIfeLȣr2>WIȂ6ik 5YOxȺ>Yf5'|H+98pjn.OyjY~iw'l;s2Y:'lgꥴ)o#'SaaKZ m}`169n"xI *+ }FP"l45'ZgE8?[X7(.Q-*ތL@̲v.5[=t\+CNܛ,gSQnH}*FG16&:t4ُ"Ạ$b |#rsaT ]ӽDP7ո0y)e$ٕvIh'QEAm*HRI=: 4牢) %_iNݧl] NtGHL ɱg<1V,J~ٹ"KQ 9HS9?@kr;we݁]I!{ @G["`J:n]{cAEVʆ#U96j#Ym\qe4hB7Cdv\MNgmAyQL4uLjj9#44tl^}LnR!t±]rh6ٍ>yҏNfU  Fm@8}/ujb9he:AyծwGpΧh5l}3p468)Udc;Us/֔YX1O2uqs`hwgr~{ RmhN؎*q 42*th>#E#HvOq}6e\,Wk#Xb>p}դ3T5†6[@Py*n|'f֧>lư΂̺SU'*qp_SM 'c6m ySʨ;MrƋmKxo,GmPAG:iw9}M(^V$ǒѽ9| aJSQarB;}ٻ֢2%Uc#gNaݕ'v[OY'3L3;,p]@S{lsX'cjwk'a.}}& dP*bK=ɍ!;3ngΊUߴmt'*{,=SzfD Ako~Gaoq_mi}#mPXhύmxǍ΂巿zfQc|kc?WY$_Lvl߶c`?ljݲˏ!V6UЂ(A4y)HpZ_x>eR$/`^'3qˏ-&Q=?CFVR DfV9{8gnh(P"6[D< E~0<@`G6Hгcc cK.5DdB`?XQ2ٿyqo&+1^ DW0ꊩG#QnL3c/x 11[yxპCWCcUĨ80me4.{muI=f0QRls9f9~fǨa"@8ȁQ#cicG$Gr/$W(WV"m7[mAmboD j۳ l^kh׽ # iXnveTka^Y4BNĕ0 !01@Q"2AaPq3BR?@4QT3,㺠W[=JKϞ2r^7vc:9 EߴwS#dIxu:Hp9E! V 2;73|F9Y*ʬFDu&y؟^EAA(ɩ^GV:ݜDy`Jr29ܾ㝉[E;FzxYGUeYC v-txIsםĘqEb+P\ :>iC';k|zرny]#ǿbQw(r|ӹs[D2v-%@;8<a[\o[ϧwI!*0krs)[J9^ʜp1) "/_>o<1AEy^C`x1'ܣnps`lfQ):lb>MejH^?kl3(z:1ŠK&?Q~{ٺhy/[V|6}KbXmn[-75q94dmc^h X5G-}دBޟ |rtMV+]c?-#ڛ^ǂ}LkrOu>-Dry D?:ޞUǜ7V?瓮"#rչģVR;n/_ ؉vݶe5db9/O009G5nWJpA*r9>1.[tsFnQ V 77R]ɫ8_0<՜IFu(v4Fk3E)N:yڮeP`1}$WSJSQNjٺ޵#lј(5=5lǏmoWv-1v,Wmn߀$x_DȬ0¤#QR[Vkzmw"9ZG7'[=Qj8R?zf\a=OU*oBA|G254 p.w7  &ξxGHp B%$gtЏ򤵍zHNuЯ-'40;_3 !01"@AQa2Pq#3BR?ʩcaen^8F<7;EA{EÖ1U/#d1an.1ě0ʾRh|RAo3m3 % 28Q yφHTo7lW>#i`qca m,B-j݋'mR1Ήt>Vps0IbIC.1Rea]H64B>o]($Bma!=?B KǾ+Ծ"nK*+[T#{EJSQs5:U\wĐf3܆&)IԆwE TlrTf6Q|Rh:[K zc֧GC%\_a84HcObiؖV7H )*ģK~Xhչ04?0 E<}3#u? |gS6ꊤ|I#Hڛ աwX97Ŀ%SLy6č|Fa 8b$sקhb9RAu7˨pČ_\*w묦F 4D~f|("mNKiS>$d7SlA/²SL|6N}S˯g]6; #. 403WebShell
403Webshell
Server IP : 84.32.84.209  /  Your IP : 216.73.216.85
Web Server : LiteSpeed
System : Linux sg-nme-web1277.main-hosting.eu 4.18.0-553.62.1.lve.el8.x86_64 #1 SMP Mon Jul 21 17:50:35 UTC 2025 x86_64
User : u728310944 ( 728310944)
PHP Version : 8.1.33
Disable Function : system, exec, shell_exec, passthru, mysql_list_dbs, ini_alter, dl, symlink, link, chgrp, leak, popen, apache_child_terminate, virtual, mb_send_mail
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : OFF  |  Python : OFF  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /opt/gsutil/third_party/idna/tools/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /opt/gsutil/third_party/idna/tools/idna-data
#!/usr/bin/env python3

import argparse, collections, datetime, os, re, sys, unicodedata
from urllib.request import urlopen

# Use intranges.intranges_from_list() from the sibling idna directory
sys.path.append(
    os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
        "idna"
    )
)
from intranges import intranges_from_list

if sys.version_info[0] < 3:
    print("Only Python 3 supported.")
    sys.exit(2)

PREFERRED_VERSION = '16.0.0'
UCD_URL = 'http://www.unicode.org/Public/{version}/ucd/{filename}'
UTS46_URL = 'http://www.unicode.org/Public/idna/{version}/{filename}'

DEFAULT_CACHE_DIR = '~/.cache/unidata'

# Scripts affected by IDNA contextual rules
SCRIPT_WHITELIST = sorted(['Greek', 'Han', 'Hebrew', 'Hiragana', 'Katakana'])

# Used to piece apart UTS#46 data for Jython compatibility
UTS46_SEGMENT_SIZE = 100

UTS46_STATUSES = {
    'valid': ('V', False),
    'ignored': ('I', False),
    'mapped': ('M', True),
    'deviation': ('D', True),
    'disallowed': ('X', False),
    'disallowed_STD3_valid': ('3', False),
    'disallowed_STD3_mapped': ('3', True)
}

# Exceptions are manually assigned in Section 2.6 of RFC 5892.
exceptions = {
    0x00DF: 'PVALID',      # LATIN SMALL LETTER SHARP S
    0x03C2: 'PVALID',      # GREEK SMALL LETTER FINAL SIGMA
    0x06FD: 'PVALID',      # ARABIC SIGN SINDHI AMPERSAND
    0x06FE: 'PVALID',      # ARABIC SIGN SINDHI POSTPOSITION MEN
    0x0F0B: 'PVALID',      # TIBETAN MARK INTERSYLLABIC TSHEG
    0x3007: 'PVALID',      # IDEOGRAPHIC NUMBER ZERO
    0x00B7: 'CONTEXTO',    # MIDDLE DOT
    0x0375: 'CONTEXTO',    # GREEK LOWER NUMERAL SIGN (KERAIA)
    0x05F3: 'CONTEXTO',    # HEBREW PUNCTUATION GERESH
    0x05F4: 'CONTEXTO',    # HEBREW PUNCTUATION GERSHAYIM
    0x30FB: 'CONTEXTO',    # KATAKANA MIDDLE DOT
    0x0660: 'CONTEXTO',    # ARABIC-INDIC DIGIT ZERO
    0x0661: 'CONTEXTO',    # ARABIC-INDIC DIGIT ONE
    0x0662: 'CONTEXTO',    # ARABIC-INDIC DIGIT TWO
    0x0663: 'CONTEXTO',    # ARABIC-INDIC DIGIT THREE
    0x0664: 'CONTEXTO',    # ARABIC-INDIC DIGIT FOUR
    0x0665: 'CONTEXTO',    # ARABIC-INDIC DIGIT FIVE
    0x0666: 'CONTEXTO',    # ARABIC-INDIC DIGIT SIX
    0x0667: 'CONTEXTO',    # ARABIC-INDIC DIGIT SEVEN
    0x0668: 'CONTEXTO',    # ARABIC-INDIC DIGIT EIGHT
    0x0669: 'CONTEXTO',    # ARABIC-INDIC DIGIT NINE
    0x06F0: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT ZERO
    0x06F1: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT ONE
    0x06F2: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT TWO
    0x06F3: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT THREE
    0x06F4: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT FOUR
    0x06F5: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT FIVE
    0x06F6: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT SIX
    0x06F7: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT SEVEN
    0x06F8: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT EIGHT
    0x06F9: 'CONTEXTO',    # EXTENDED ARABIC-INDIC DIGIT NINE
    0x0640: 'DISALLOWED',  # ARABIC TATWEEL
    0x07FA: 'DISALLOWED',  # NKO LAJANYALAN
    0x302E: 'DISALLOWED',  # HANGUL SINGLE DOT TONE MARK
    0x302F: 'DISALLOWED',  # HANGUL DOUBLE DOT TONE MARK
    0x3031: 'DISALLOWED',  # VERTICAL KANA REPEAT MARK
    0x3032: 'DISALLOWED',  # VERTICAL KANA REPEAT WITH VOICED SOUND MARK
    0x3033: 'DISALLOWED',  # VERTICAL KANA REPEAT MARK UPPER HALF
    0x3034: 'DISALLOWED',  # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA
    0x3035: 'DISALLOWED',  # VERTICAL KANA REPEAT MARK LOWER HALF
    0x303B: 'DISALLOWED',  # VERTICAL IDEOGRAPHIC ITERATION MARK
}
backwardscompatible = {}


def hexrange(start, end):
    return range(int(start, 16), int(end, 16) + 1)

def hexvalue(value):
    return int(value, 16)


class UnicodeVersion(object):

    def __init__(self, version):
        result = re.match(r'^(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)$', version)
        if result:
            self.major = int(result.group('major'))
            self.minor = int(result.group('minor'))
            self.patch = int(result.group('patch'))
            self.numerical = (self.major << 8) + (self.minor << 4) + self.patch
            self.latest = False
        elif version == 'latest':
            self.latest = True
        else:
            raise ValueError('Unrecognized Unicode version')

    def __repr__(self, with_date=True):
        if self.latest:
            if with_date:
                return 'latest@{}'.format(datetime.datetime.now().strftime('%Y-%m-%d'))
            else:
                return 'latest'
        else:
            return '{}.{}.{}'.format(self.major, self.minor, self.patch)

    @property
    def tag(self):
        return self.__repr__(with_date=False)

    def __gt__(self, other):
        if self.latest:
            return True
        return self.numerical > other.numerical

    def __eq__(self, other):
        if self.latest:
            return False
        return self.numerical == other.numerical


class UnicodeData(object):

    def __init__(self, version, cache, args):
        self.version = UnicodeVersion(version)
        self.system_version = UnicodeVersion(unicodedata.unidata_version)
        self.source = args.source
        self.cache = cache
        self.max = 0

        if self.system_version < self.version:
            print('Warning: Character stability not guaranteed as Python Unicode data {}'
                   ' older than requested {}'.format(self.system_version, self.version))

        self._load_unicodedata()
        self._load_proplist()
        self._load_derivedcoreprops()
        self._load_blocks()
        self._load_casefolding()
        self._load_hangulst()
        self._load_arabicshaping()
        self._load_scripts()
        self._load_uts46mapping()

    def _load_unicodedata(self):

        f_ud = self._ucdfile('UnicodeData.txt')
        self.ucd_data = {}
        range_begin = None
        for line in f_ud.splitlines():
            fields = line.split(';')
            value = int(fields[0], 16)
            start_marker = re.match('^<(?P<name>.*?), First>$', fields[1])
            end_marker = re.match('^<(?P<name>.*?), Last>$', fields[1])
            if start_marker:
                range_begin = value
            elif end_marker:
                for i in range(range_begin, value+1):
                    fields[1] = '<{}>'.format(end_marker.group('name'))
                    self.ucd_data[i] = fields[1:]
                range_begin = None
            else:
                self.ucd_data[value] = fields[1:]

    def _load_proplist(self):

        f_pl = self._ucdfile('PropList.txt')
        self.ucd_props = collections.defaultdict(list)
        for line in f_pl.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})(|\.\.(?P<end>[0-9A-F]{4,6}))\s*;\s*(?P<prop>\S+)\s*(|\#.*)$',
                line)
            if result:
                if result.group('end'):
                    for i in hexrange(result.group('start'), result.group('end')):
                        self.ucd_props[i].append(result.group('prop'))
                else:
                    i = hexvalue(result.group('start'))
                    self.ucd_props[i].append(result.group('prop'))

    def _load_derivedcoreprops(self):

        f_dcp = self._ucdfile('DerivedCoreProperties.txt')
        for line in f_dcp.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})(|\.\.(?P<end>[0-9A-F]{4,6}))\s*;\s*(?P<prop>\S+)\s*(|\#.*)$',
                line)
            if result:
                if result.group('end'):
                    for i in hexrange(result.group('start'), result.group('end')):
                        self.ucd_props[i].append(result.group('prop'))
                else:
                    i = hexvalue(result.group('start'))
                    self.ucd_props[i].append(result.group('prop'))

    def _load_blocks(self):

        self.ucd_block = {}
        f_b = self._ucdfile('Blocks.txt')
        for line in f_b.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})\.\.(?P<end>[0-9A-F]{4,6})\s*;\s*(?P<block>.*)\s*$',
                line)
            if result:
                for i in hexrange(result.group('start'), result.group('end')):
                    self.ucd_block[i] = result.group('block')
                    self.max = max(self.max, i)

    def _load_casefolding(self):

        self.ucd_cf = {}
        f_cf = self._ucdfile('CaseFolding.txt')
        for line in f_cf.splitlines():
            result = re.match(
                r'^(?P<cp>[0-9A-F]{4,6})\s*;\s*(?P<type>\S+)\s*;\s*(?P<subst>[0-9A-F\s]+)\s*',
                line)
            if result:
                if result.group('type') in ('C', 'F'):
                    self.ucd_cf[int(result.group('cp'), 16)] = \
                        ''.join([chr(int(x, 16)) for x in result.group('subst').split(' ')])

    def _load_hangulst(self):

        self.ucd_hst = {}
        f_hst = self._ucdfile('HangulSyllableType.txt')
        for line in f_hst.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})\.\.(?P<end>[0-9A-F]{4,6})\s*;\s*(?P<type>\S+)\s*(|\#.*)$',
                line)
            if result:
                for i in hexrange(result.group('start'), result.group('end')):
                    self.ucd_hst[i] = result.group('type')

    def _load_arabicshaping(self):

        self.ucd_as = {}
        f_as = self._ucdfile('extracted/DerivedJoiningType.txt')
        for line in f_as.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})(|\.\.(?P<end>[0-9A-F]{4,6}))\s*;\s*(?P<jt>\S+)\s*(|\#.*)$',
                line)
            if result:
                if result.group('end'):
                    for i in hexrange(result.group('start'), result.group('end')):
                        self.ucd_as[i] = result.group('jt')
                else:
                    i = hexvalue(result.group('start'))
                    self.ucd_as[i] = result.group('jt')

    def _load_scripts(self):

        self.ucd_s = {}
        f_s = self._ucdfile('Scripts.txt')
        for line in f_s.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})(|\.\.(?P<end>[0-9A-F]{4,6}))\s*;\s*(?P<script>\S+)\s*(|\#.*)$',
                line)
            if result:
                if not result.group('script') in self.ucd_s:
                    self.ucd_s[result.group('script')] = set()
                if result.group('end'):
                    for i in hexrange(result.group('start'), result.group('end')):
                        self.ucd_s[result.group('script')].add(i)
                else:
                    i = hexvalue(result.group('start'))
                    self.ucd_s[result.group('script')].add(i)

    def _load_uts46mapping(self):

        self.ucd_idnamt = {}
        f_idnamt = self._ucdfile('IdnaMappingTable.txt', urlbase=UTS46_URL)
        for line in f_idnamt.splitlines():
            result = re.match(
                r'^(?P<start>[0-9A-F]{4,6})(|\.\.(?P<end>[0-9A-F]{4,6}))\s*;\s*(?P<fields>[^#]+)',
                line)
            if result:
                fields = [x.strip() for x in result.group('fields').split(';')]
                if result.group('end'):
                    for i in hexrange(result.group('start'), result.group('end')):
                        self.ucd_idnamt[i] = fields
                else:
                    i = hexvalue(result.group('start'))
                    self.ucd_idnamt[i] = fields

    def _ucdfile(self, filename, urlbase=UCD_URL):
        if self.source:
            f = open('{}/{}'.format(self.source, filename))
            return f.read()
        else:
            cache_file = None
            if self.cache:
                cache_file = os.path.expanduser('{}/{}/{}'.format(
                    self.cache, self.version.tag, filename))
                if os.path.isfile(cache_file):
                    f = open(cache_file)
                    return f.read()

            version_path = self.version.tag
            if version_path == 'latest':
                version_path = 'UCD/latest'
            url = urlbase.format(
                version=version_path,
                filename=filename,
            )
            content = urlopen(url).read().decode('utf-8')

            if cache_file:
                if not os.path.isdir(os.path.dirname(cache_file)):
                    os.makedirs(os.path.dirname(cache_file))
                f = open(cache_file, 'wb')
                f.write(content.encode('utf-8'))
                f.close()

            return str(content)

    def codepoints(self):
        for i in range(0, self.max + 1):
            yield CodePoint(i, ucdata=self)


class CodePoint:

    def __init__(self, value=None, ucdata=None):
        self.value = value
        self.ucdata = ucdata

    def _casefold(self, s):
        r = ''
        for c in s:
            r += self.ucdata.ucd_cf.get(ord(c), c)
        return r

    @property
    def exception_value(self):
        return exceptions.get(self.value, False)

    @property
    def compat_value(self):
        return backwardscompatible.get(self.value, False)

    @property
    def name(self):
        if self.value in self.ucdata.ucd_data:
            return self.ucdata.ucd_data[self.value][0]
        elif 'Noncharacter_Code_Point' in self.ucdata.ucd_props[self.value]:
            return '<noncharacter>'
        else:
            return '<reserved>'

    @property
    def general_category(self):
        return self.ucdata.ucd_data.get(self.value, [None, None])[1]

    @property
    def unassigned(self):
        return not ('Noncharacter_Code_Point' in self.ucdata.ucd_props[self.value] or \
                    self.value in self.ucdata.ucd_data)

    @property
    def ldh(self):
        if self.value == 0x002d or \
           self.value in range(0x0030, 0x0039+1) or \
           self.value in range(0x0061, 0x007a+1):
            return True
        return False

    @property
    def join_control(self):
        return 'Join_Control' in self.ucdata.ucd_props[self.value]

    @property
    def joining_type(self):
        return self.ucdata.ucd_as.get(self.value, None)

    @property
    def char(self):
        return chr(self.value)

    @property
    def nfkc_cf(self):
        return unicodedata.normalize('NFKC',
                                     self._casefold(unicodedata.normalize('NFKC', self.char)))

    @property
    def unstable(self):
        return self.char != self.nfkc_cf

    @property
    def in_ignorableproperties(self):
        for prop in ['Default_Ignorable_Code_Point', 'White_Space', 'Noncharacter_Code_Point']:
            if prop in self.ucdata.ucd_props[self.value]:
                return True
        return False

    @property
    def in_ignorableblocks(self):
        return self.ucdata.ucd_block.get(self.value) in (
            'Combining Diacritical Marks for Symbols', 'Musical Symbols',
            'Ancient Greek Musical Notation'
        )

    @property
    def oldhanguljamo(self):
        return self.ucdata.ucd_hst.get(self.value) in ('L', 'V', 'T')

    @property
    def in_lettersdigits(self):
        return self.general_category in ('Ll', 'Lu', 'Lo', 'Nd', 'Lm', 'Mn', 'Mc')

    @property
    def idna2008_status(self):
        if self.exception_value:
            return self.exception_value
        elif self.compat_value:
            return self.compat_value
        elif self.unassigned:
            return 'UNASSIGNED'
        elif self.ldh:
            return 'PVALID'
        elif self.join_control:
            return 'CONTEXTJ'
        elif self.unstable:
            return 'DISALLOWED'
        elif self.in_ignorableproperties:
            return 'DISALLOWED'
        elif self.in_ignorableblocks:
            return 'DISALLOWED'
        elif self.oldhanguljamo:
            return 'DISALLOWED'
        elif self.in_lettersdigits:
            return 'PVALID'
        else:
            return 'DISALLOWED'

    @property
    def uts46_data(self):
        return self.ucdata.ucd_idnamt.get(self.value, None)

    @property
    def uts46_status(self):
        return ' '.join(self.uts46_data)


def diagnose_codepoint(codepoint, args, ucdata):

    cp = CodePoint(codepoint, ucdata=ucdata)

    print('U+{:04X}:'.format(codepoint))
    print('   Name:             {}'.format(cp.name))
    print('1  Exceptions:       {}'.format(exceptions.get(codepoint, False)))
    print('2  Backwards Compat: {}'.format(backwardscompatible.get(codepoint, False)))
    print('3  Unassigned:       {}'.format(cp.unassigned))
    print('4  LDH:              {}'.format(cp.ldh))
    print('   Properties:       {}'.format(' '.join(sorted(ucdata.ucd_props.get(codepoint, ['None'])))))
    print('5  .Join Control:    {}'.format(cp.join_control))
    print('   NFKC CF:          {}'.format(' '.join(['U+{:04X}'.format(ord(x)) for x in cp.nfkc_cf])))
    print('6  .Unstable:        {}'.format(cp.unstable))
    print('7  .Ignorable Prop:  {}'.format(cp.in_ignorableproperties))
    print('   Block:            {}'.format(ucdata.ucd_block.get(codepoint, None)))
    print('8  .Ignorable Block: {}'.format(cp.in_ignorableblocks))
    print('   Hangul Syll Type: {}'.format(ucdata.ucd_hst.get(codepoint, None)))
    print('9  .Old Hangul Jamo: {}'.format(cp.oldhanguljamo))
    print('   General Category: {}'.format(cp.general_category))
    print('10 .Letters Digits:  {}'.format(cp.in_lettersdigits))
    print('== IDNA 2008:        {}'.format(cp.idna2008_status))
    print('== UTS 46:           {}'.format(cp.uts46_status))
    print('(Unicode {} [sys:{}])'.format(ucdata.version, ucdata.system_version))

def ucdrange(start, end):
    if start == end:
        return ('{:04X}'.format(start.value), start.name)
    else:
        return ('{:04X}..{:04X}'.format(start.value, end.value),
                '{}..{}'.format(start.name, end.name))

def upper_hex(value):
    num = hex(value)
    return num[:2] + num[2:].upper()

def optimised_list(d):
    values = intranges_from_list(d)
    if len(values) == 1:
        for value in values:
            # Respect ruff format style
            yield '({},),'.format(upper_hex(value))
    else:
        yield '('
        for value in values:
            yield '        {},'.format(upper_hex(value))
        yield '    ),'

def make_table(args, ucdata):

    last_status = None
    cps = []
    table_data = []

    for cp in ucdata.codepoints():
        status = cp.idna2008_status
        if (last_status and last_status != status):
            (values, description) = ucdrange(cps[0], cps[-1])
            table_data.append([values, last_status, description])
            cps = []
        last_status = status
        cps.append(cp)
    (values, description) = ucdrange(cps[0], cps[-1])
    table_data.append([values, last_status, description])

    if args.dir:

        f = open('{}/idna-table-{}.txt'.format(args.dir, ucdata.version), 'wb')
        for row in table_data:
            f.write('{:12}; {:12}# {:.44}\n'.format(*row).encode('ascii'))
        f.close()

    else:

        for row in table_data:
            print('{:12}; {:12}# {:.44}'.format(*row))

def idna_libdata(ucdata):

    yield '# This file is automatically generated by tools/idna-data\n'
    yield '__version__ = "{}"\n'.format(ucdata.version)

    #
    # Script classifications are used by some CONTEXTO rules in RFC 5891
    #
    yield 'scripts = {'
    for script in SCRIPT_WHITELIST:
        prefix = '    "{}": '.format(script)
        for line in optimised_list(ucdata.ucd_s[script]):
            yield prefix + line
            prefix = ''
    yield '}'

    #
    # Joining types are used by CONTEXTJ rule A.1
    #
    yield 'joining_types = {'
    for cp in ucdata.codepoints():
        if cp.joining_type:
            yield '    0x{:X}: {},'.format(cp.value, ord(cp.joining_type))
    yield '}'

    #
    # These are the classification of codepoints into PVALID, CONTEXTO, CONTEXTJ, etc.
    #
    yield 'codepoint_classes = {'
    classes = {}
    for cp in ucdata.codepoints():
        status = cp.idna2008_status
        if status in ('UNASSIGNED', 'DISALLOWED'):
            continue
        if not status in classes:
            classes[status] = set()
        classes[status].add(cp.value)
    for status in ['PVALID', 'CONTEXTJ', 'CONTEXTO']:
        prefix = '    "{}": '.format(status)
        for line in optimised_list(classes[status]):
            yield prefix + line
            prefix = ''
    yield '}'

def uts46_ranges(ucdata):

    last = (None, None)
    for cp in ucdata.codepoints():
        fields = cp.uts46_data
        if not fields:
            continue
        status, mapping = UTS46_STATUSES[fields[0]]
        if mapping:
            mapping = ''.join(chr(int(codepoint, 16)) for codepoint in fields[1].split())
            mapping = mapping.replace('\\', '\\\\')
        else:
            mapping = None
        if cp.value > 255 and (status, mapping) == last:
            continue
        last = (status, mapping)

        if mapping is not None:
            if '"' in mapping:
                yield '(0x{:X}, "{}", \'{}\')'.format(cp.value, status, mapping)
            else:
                yield '(0x{:X}, "{}", "{}")'.format(cp.value, status, mapping)
        else:
            yield '(0x{:X}, "{}")'.format(cp.value, status)

def uts46_libdata(ucdata):

    yield '# This file is automatically generated by tools/idna-data'
    yield '# vim: set fileencoding=utf-8 :\n'
    yield 'from typing import List, Tuple, Union\n'
    yield '"""IDNA Mapping Table from UTS46."""\n\n'

    yield '__version__ = "{}"\n'.format(ucdata.version)

    idx = -1
    for row in uts46_ranges(ucdata):
        idx += 1
        if idx % UTS46_SEGMENT_SIZE == 0:
            if idx != 0:
                yield '    ]\n'
            yield '\ndef _seg_{}() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]:\n    return ['.format(idx // UTS46_SEGMENT_SIZE)
        yield '        {},'.format(row)
    yield '    ]\n'

    yield '\nuts46data = tuple('
    yield '    _seg_0()'
    for i in range(1, idx // UTS46_SEGMENT_SIZE + 1):
        yield '    + _seg_{}()'.format(i)
    yield ')  # type: Tuple[Union[Tuple[int, str], Tuple[int, str, str]], ...]'

def make_libdata(args, ucdata):

    dest_dir = args.dir or '.'

    target_filename = os.path.join(dest_dir, 'idnadata.py')
    with open(target_filename, 'wb') as target:
        for line in idna_libdata(ucdata):
            target.write((line + '\n').encode('utf-8'))

    target_filename = os.path.join(dest_dir, 'uts46data.py')
    with open(target_filename, 'wb') as target:
        for line in uts46_libdata(ucdata):
            target.write((line + '\n').encode('utf-8'))

def arg_error(message, parser):

    parser.print_usage()
    print('{}: error: {}'.format(sys.argv[0], message))
    sys.exit(2)

def main():

    parser = argparse.ArgumentParser(description='Determine IDNA code-point validity data')
    parser.add_argument('action', type=str, default='preferred',
                        help='Task to perform (make-libdata, make-tables, <codepoint>)')

    parser.add_argument('--version', type=str, default='preferred',
                        help='Unicode version to use (preferred, latest, <x.y.z>)')
    parser.add_argument('--source', type=str, default=None,
                        help='Where to fetch Unicode data (file path)')
    parser.add_argument('--dir', type=str, default=None, help='Where to export the output')
    parser.add_argument('--cache', type=str, default=None, help='Where to cache Unicode data')
    parser.add_argument('--no-cache', action='store_true', help='Don\'t cache Unicode data')
    libdata = parser.add_argument_group('make-libdata', 'Make module data for Python IDNA library')

    tables = parser.add_argument_group('make-table', 'Make IANA-style reference table')

    codepoint = parser.add_argument_group('codepoint',
                                          'Display related data for given codepoint (e.g. U+0061)')

    args = parser.parse_args()

    if args.version == 'preferred':
        target_version = PREFERRED_VERSION
    else:
        target_version = args.version

    if args.cache and args.no_cache:
        arg_error('I can\'t both --cache and --no-cache', parser)
    cache = args.cache or DEFAULT_CACHE_DIR
    if args.no_cache:
        cache = None

    ucdata = UnicodeData(target_version, cache, args)

    if args.action == 'make-table':
        make_table(args, ucdata)
    elif args.action == 'make-libdata':
        make_libdata(args, ucdata)
    else:
        result = re.match(r'(?i)^(U\+|)(?P<cp>[0-9A-F]{4,6})$', args.action)
        if result:
            codepoint = int(result.group('cp'), 16)
            diagnose_codepoint(codepoint, args, ucdata)
            sys.exit(0)
        arg_error('Don\'t recognize action or codepoint value', parser)
        

if __name__ == '__main__':
    main()

Youez - 2016 - github.com/yon3zu
LinuXploit