
    ThYJ                         d Z ddlZddlZddlmZmZmZ  G d de      Zd Z	 G d d	e      Z
 G d
 de      ZddZedk(  rddlZ ej                          yy)u  Unicode utility functions

>>> from .import unicode_util
>>> from .util import u
>>> u1 = '1'  # DIGIT ONE
>>> u2 = u('a')  # LATIN SMALL LETTER A
>>> u3 = u('２')  # FULLWIDTH DIGIT TWO
>>> u4 = u('Ā')  # LATIN CAPITAL LETTER A WITH MACRON
>>> unicode_util.Category.get(u1) == u('Nd')
True
>>> unicode_util.Category.get(u2) == u('Ll')
True
>>> unicode_util.Category.get(u3) == u('Nd')
True
>>> unicode_util.Category.get(u4) == u('Lu')
True
>>> unicode_util.Category.get(u2) == unicode_util.Category.LOWERCASE_LETTER
True
>>> try:
...     beyond_bmp = u('𐄀')  # AEGEAN WORD SEPARATOR LINE
... except Exception:
...     beyond_bmp = u('')
>>> if len(beyond_bmp) == 1:  # We have a UCS4 build of Python
...     cat_po = unicode_util.Category.get(beyond_bmp)
... else:  # UCS2 build of Python; no non-BMP chars available
...     cat_po = unicode_util.Category.OTHER_PUNCTUATION
>>> cat_po == u('Po')
True
>>> unicode_util.is_letter(u1)
False
>>> unicode_util.is_letter(u2)
True
>>> unicode_util.is_letter(u3)
False
>>> unicode_util.is_letter(u4)
True
>>> b1 = unicode_util.Block.get(u1)
>>> str(b1)
'Block[0000, 007f]'
>>> b1 == unicode_util.Block.BASIC_LATIN
True
>>> b1 == [0x0000, 0x0075]
False
>>> b2 = unicode_util.Block.get(u2)
>>> b2 == unicode_util.Block.BASIC_LATIN
True
>>> b3 = unicode_util.Block.get(u3)
>>> b3 != unicode_util.Block.BASIC_LATIN
True
>>> b3 == unicode_util.Block.HALFWIDTH_AND_FULLWIDTH_FORMS
True
>>> b4 = unicode_util.Block.get(u4)
>>> b4 == unicode_util.Block.LATIN_EXTENDED_A
True
>>> unicode_util.Block.get(u('ࡠ')) == unicode_util.Block.UNKNOWN
True
>>> try:
...     unknown_block = u('𓐰')
... except Exception:
...     unknown_block = u('')
>>> if len(unknown_block) == 1:  # We have a UCS4 build of Python
...     unicode_util.Block.get(u('𓐰')) == unicode_util.Block.UNKNOWN
... else:  # UCS2 build of Python; no unknown characters available
...     True
True
>>> unicode_util.digit(u1)
1
>>> unicode_util.digit(u2, -1)
-1
>>> unicode_util.digit(u3, -1)
2
>>> str(hash(b3))  # doctest: +ELLIPSIS
'...'
    N   )UnicodeMixinunicoduc                   p   e Zd ZdZ ed      Z ed      Z ed      Z ed      Z ed      Z	 ed      Z
 ed      Z ed	      Z ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z  ed      Z! ed      Z" ed       Z# ed!      Z$ ed"      Z% ed#      Z& ed$      Z' ed%      Z( ed&      Z)e*d'        Z+y())CategoryzaGeneral category of a Unicode character.

    See http://www.unicode.org/reports/tr18/#CategoriesLLuLlLtLmLoMMnMcMeNNdNlNoSSmScSkSoPPcPdPsPePiPfPoZZsZlZpCCcCfCsCoCnc                 T    t        |      }t        t        j                  |            S )zTReturn the general category code (as Unicode string) for the given Unicode character)r   unicodedatacategory)clsuni_chars     Y/opt/server/standalone/crm/venv/lib/python3.12/site-packages/phonenumbers/unicode_util.pygetzCategory.get{   s$     (#k**8455    N),__name__
__module____qualname____doc__r   LETTERUPPERCASE_LETTERLOWERCASE_LETTERTITLECASE_LETTERMODIFIER_LETTEROTHER_LETTERMARKNON_SPACING_MARKSPACING_COMBINING_MARKENCLOSING_MARKNUMBERDECIMAL_DIGIT_NUMBERLETTER_NUMBEROTHER_NUMBERSYMBOLMATH_SYMBOLCURRENCY_SYMBOLMODIFIER_SYMBOLOTHER_SYMBOLPUNCTUATIONCONNECTOR_PUNCTUATIONDASH_PUNCTUATIONOPEN_PUNCTUATIONCLOSE_PUNCTUATIONINITIAL_PUNCTUATIONFINAL_PUNCTUATIONOTHER_PUNCTUATION	SEPARATORSPACE_SEPARATORLINE_SEPARATORPARAGRAPH_SEPARATOROTHERCONTROLFORMAT	SURROGATEPRIVATE_USENOT_ASSIGNEDclassmethodr4    r5   r3   r   r   Q   sv   ; sVFwwwgOT7LS6DwtWtWNsVFT7dGMT7LsVFD'KgOgOT7LC&KdGww$D'$$#IgOtWND'cFEgGtWF$ID'KT7L6 6r5   r   c                     t         j                  |       }|t         j                  k(  xsR |t         j                  k(  xs= |t         j                  k(  xs( |t         j
                  k(  xs |t         j                  k(  S )zADetermine whether the given Unicode character is a Unicode letter)r   r4   r;   r<   r=   r>   r?   )r2   r0   s     r3   	is_letterrb      ss    ||H%H111 .111.111. 000. ---	/r5   c                   0    e Zd ZdZddZd Zd Zd Zd Zy)	_BlockRangez?Describe the range of characters encompassed by a Unicode blockNc                 0    || _         || _        || ||<   y y N)startend)selfrg   rh   regdicts       r3   __init__z_BlockRange.__init__   s$    
!GEN r5   c                     t        |t              st        S | j                  |j                  k(  xr | j                  |j                  k(  S rf   )
isinstancerd   NotImplementedrg   rh   ri   others     r3   __eq__z_BlockRange.__eq__   s7    %-!!

ekk)Cdhh%)).CDr5   c                     | |k(   S rf   r`   ro   s     r3   __ne__z_BlockRange.__ne__   s    5=  r5   c                 D    t        | j                  | j                  f      S rf   )hashrg   rh   ri   s    r3   __hash__z_BlockRange.__hash__   s    TZZ*++r5   c                 J    t        d      | j                  | j                  fz  S )NzBlock[%04x, %04x])r   rg   rh   rv   s    r3   __unicode__z_BlockRange.__unicode__   s     )*djj$((-CCCr5   rf   )	r6   r7   r8   r9   rk   rq   rs   rw   ry   r`   r5   r3   rd   rd      s     I"E
!,Dr5   rd   c                      e Zd ZdZi ZdZ edde      Z edde      Z edde      Z	 ed	d
e      Z
 edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edd e      Z ed!d"e      Z ed#d$e      Z ed%d&e      Z ed'd(e      Z ed)d*e      Z ed+d,e      Z ed-d.e      Z ed/d0e      Z ed1d2e      Z ed3d4e      Z ed5d6e      Z  ed7d8e      Z! ed9d:e      Z" ed;d<e      Z# ed=d>e      Z$ ed?d@e      Z% edAdBe      Z& edCdDe      Z' edEdFe      Z( edGdHe      Z) edIdJe      Z* edKdLe      Z+ edMdNe      Z, edOdPe      Z- edQdRe      Z. edSdTe      Z/ edUdVe      Z0 edWdXe      Z1 edYdZe      Z2 ed[d\e      Z3 ed]d^e      Z4 ed_d`e      Z5 edadbe      Z6 edcdde      Z7 ededfe      Z8 edgdhe      Z9 edidje      Z: edkdle      Z; edmdne      Z< edodpe      Z= edqdre      Z> edsdte      Z? edudve      Z@ edwdxe      ZA edydze      ZB ed{d|e      ZC ed}d~e      ZD edde      ZE edde      ZF edde      ZG edde      ZH edde      ZI edde      ZJ edde      ZK edde      ZL edde      ZM edde      ZN edde      ZO edde      ZP edde      ZQ edde      ZR edde      ZS edde      ZT edde      ZU edde      ZV edde      ZW edde      ZX edde      ZY edde      ZZ edde      Z[ edde      Z\ edde      Z] edde      Z^ edde      Z_ edde      Z` edde      Za edde      Zb edde      Zc edde      Zd edde      Ze edde      Zf edde      Zg edde      Zh edde      Zi edde      Zj edde      Zk edde      Zl edde      Zm edde      Zn edde      Zo edde      Zp edde      Zq edde      Zr edde      Zs edde      Zt edde      Zu edde      Zv edde      Zw edde      Zx edde      Zy edde      Zz edde      Z{ edde      Z| edde      Z} edde      Z~ edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edd e      Z edde      Z edde      Z edde      Z edde      Z ed	d
e      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edd e      Z ed!d"e      Z ed#d$e      Z ed%d&e      Z ed'd(e      Z ed)d*e      Z ed+d,e      Z ed-d.e      Z ed/d0e      Z ed1d2e      Z ed3d4e      Z ed5d6e      Z ed7d8e      Z ed9d:e      Z ed;d<e      Z ed=d>e      Z ed?d@e      Z edAdBe      Z edCdDe      Z edEdFe      Z edGdHe      Z edIdJe      Z edKdLe      Z edMdNe      Z edOdPe      Z edQdRe      Z edSdTe      Z edUdVe      Z edWdXe      Z edYdZe      Z ed[d\e      Z ed]d^e      Z ed_d`e      Z edadbe      Z edcdde      Z ededfe      Z edgdhe      Z edidje      Z edkdle      Z edmdne      Z edodpe      Z edqdre      Z edsdte      Z edudve      Z edwdxe      Z edydze      Z ed{d|e      Z ed}d~e      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edde      Z edd      Zeِd        Zy(  Blockz*Description of the possible Unicode blocksNr               i  i  iO  iP  i  i  i  i   io  ip  i  i   i  i   i/  i0  i  i  i  i   i  i   iO  iP  i  i  i  i  i  i   i?  i@  i_  i 	  i	  i	  i	  i 
  i
  i
  i
  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i  i  i   i  i   i  i  i  i  i  i   i  i  i  i  i  i   i  i   i?  i@  i_  i`  i  i  i  i   i  i  i  i   iO  iP  i  i  i  i  i  i   i  i   i  i   i  i  i  i  i  i   iO  iP  i  i  i  i   i  i  i  i  i  i   i  i   i  i    io   ip   i   i   i   i   i   i !  iO!  iP!  i!  i!  i!  i "  i"  i #  i#  i $  i?$  i@$  i_$  i`$  i$  i %  i%  i%  i%  i%  i%  i &  i&  i '  i'  i'  i'  i'  i'  i (  i(  i )  i)  i)  i)  i *  i*  i +  i+  i ,  i_,  i`,  i,  i,  i,  i -  i/-  i0-  i-  i-  i-  i-  i-  i .  i.  i.  i.  i /  i/  i/  i/  i 0  i?0  i@0  i0  i0  i0  i 1  i/1  i01  i1  i1  i1  i1  i1  i1  i1  i1  i1  i 2  i2  i 3  i3  i 4  iM  iM  iM  i N  i  i   i  i  iϤ  iФ  i  i   i?  i@  i  i  i  i   i  i   i  i   i/  i0  i?  i@  i  i  iߨ  i  i  i   i/  i0  i_  i`  i  i  iߩ  i   i_  i`  i  i  iߪ  i   i/  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i   i  i   iO  iP  i  i   i  i  i  i   i/  i0  iO  iP  io  ip  i  i   i  i  i  i   i  i  i  i  i? i@ i i i i i i i i i i  i/ i0 iO i i i i i  iO iP i i i i  i? i@ i_ i 	 i	 i 	 i?	 i 
 i_
 i`
 i
 i  i? i@ i_ i` i i  iO i` i i  i i i i   i# i $ i$ i 0 i/4 i h i?j i  i i  i i  i i  iO i  i_ i` i i  i i  i/ i0 i i i i  i i  i i  i i  iO i i i  i i   iߦ i  i? i@ i i  i i   i  i  i i   i i   i c                    t        |      }t        |      }t        j                  0t	        t        j
                  j                               t        _        t        j                  t        j                  |      }|dkD  r|t        j
                  t        j                  |dz
        j                  k\  r[|t        j
                  t        j                  |dz
        j                  k  r't        j
                  t        j                  |dz
        S |t        t        j
                        k  r|t        j
                  t        j                  |      j                  k\  rU|t        j
                  t        j                  |      j                  k  r$t        j
                  t        j                  |      S t        j                  S )z7Return the Unicode block of the given Unicode characterr   r   )r   ordr{   _RANGE_KEYSsorted_RANGESkeysbisectbisect_leftrg   rh   lenUNKNOWN)r1   r2   
code_pointidxs       r3   r4   z	Block.get}  s?    (#]
$ &u}}'9'9'; <E  !2!2J?!G%--(9(9#'(BCIII%--(9(9#'(BCGGG==!2!237!;<<C&&EMM%*;*;C*@AGGGEMM%*;*;C*@AEEE==!2!23!788== r5   )r6   r7   r8   r9   r   r   rd   BASIC_LATINLATIN_1_SUPPLEMENTLATIN_EXTENDED_ALATIN_EXTENDED_BIPA_EXTENSIONSSPACING_MODIFIER_LETTERSCOMBINING_DIACRITICAL_MARKSGREEK_AND_COPTICCYRILLICCYRILLIC_SUPPLEMENTARMENIANHEBREWARABICSYRIACARABIC_SUPPLEMENTTHAANANKO	SAMARITANMANDAIC
DEVANAGARIBENGALIGURMUKHIGUJARATIORIYATAMILTELUGUKANNADA	MALAYALAMSINHALATHAILAOTIBETANMYANMARGEORGIANHANGUL_JAMOETHIOPICETHIOPIC_SUPPLEMENTCHEROKEE%UNIFIED_CANADIAN_ABORIGINAL_SYLLABICSOGHAMRUNICTAGALOGHANUNOOBUHIDTAGBANWAKHMER	MONGOLIAN.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDEDLIMBUTAI_LENEW_TAI_LUEKHMER_SYMBOLSBUGINESETAI_THAMBALINESE	SUNDANESEBATAKLEPCHAOL_CHIKIVEDIC_EXTENSIONSPHONETIC_EXTENSIONSPHONETIC_EXTENSIONS_SUPPLEMENT&COMBINING_DIACRITICAL_MARKS_SUPPLEMENTLATIN_EXTENDED_ADDITIONALGREEK_EXTENDEDGENERAL_PUNCTUATIONSUPERSCRIPTS_AND_SUBSCRIPTSCURRENCY_SYMBOLS'COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLSLETTERLIKE_SYMBOLSNUMBER_FORMSARROWSMATHEMATICAL_OPERATORSMISCELLANEOUS_TECHNICALCONTROL_PICTURESOPTICAL_CHARACTER_RECOGNITIONENCLOSED_ALPHANUMERICSBOX_DRAWINGBLOCK_ELEMENTSGEOMETRIC_SHAPESMISCELLANEOUS_SYMBOLSDINGBATS$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_ASUPPLEMENTAL_ARROWS_ABRAILLE_PATTERNSSUPPLEMENTAL_ARROWS_B$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B#SUPPLEMENTAL_MATHEMATICAL_OPERATORS MISCELLANEOUS_SYMBOLS_AND_ARROWS
GLAGOLITICLATIN_EXTENDED_CCOPTICGEORGIAN_SUPPLEMENTTIFINAGHETHIOPIC_EXTENDEDCYRILLIC_EXTENDED_ASUPPLEMENTAL_PUNCTUATIONCJK_RADICALS_SUPPLEMENTKANGXI_RADICALS"IDEOGRAPHIC_DESCRIPTION_CHARACTERSCJK_SYMBOLS_AND_PUNCTUATIONHIRAGANAKATAKANABOPOMOFOHANGUL_COMPATIBILITY_JAMOKANBUNBOPOMOFO_EXTENDEDCJK_STROKESKATAKANA_PHONETIC_EXTENSIONSENCLOSED_CJK_LETTERS_AND_MONTHSCJK_COMPATIBILITY"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_AYIJING_HEXAGRAM_SYMBOLSCJK_UNIFIED_IDEOGRAPHSYI_SYLLABLESYI_RADICALSLISUVAICYRILLIC_EXTENDED_BBAMUMMODIFIER_TONE_LETTERSLATIN_EXTENDED_DSYLOTI_NAGRICOMMON_INDIC_NUMBER_FORMSPHAGS_PA
SAURASHTRADEVANAGARI_EXTENDEDKAYAH_LIREJANGHANGUL_JAMO_EXTENDED_AJAVANESECHAMMYANMAR_EXTENDED_ATAI_VIETETHIOPIC_EXTENDED_AMEETEI_MAYEKHANGUL_SYLLABLESHANGUL_JAMO_EXTENDED_BHIGH_SURROGATESHIGH_PRIVATE_USE_SURROGATESLOW_SURROGATESPRIVATE_USE_AREACJK_COMPATIBILITY_IDEOGRAPHSALPHABETIC_PRESENTATION_FORMSARABIC_PRESENTATION_FORMS_AVARIATION_SELECTORSVERTICAL_FORMSCOMBINING_HALF_MARKSCJK_COMPATIBILITY_FORMSSMALL_FORM_VARIANTSARABIC_PRESENTATION_FORMS_BHALFWIDTH_AND_FULLWIDTH_FORMSSPECIALSLINEAR_B_SYLLABARYLINEAR_B_IDEOGRAMSAEGEAN_NUMBERSANCIENT_GREEK_NUMBERSANCIENT_SYMBOLSPHAISTOS_DISCLYCIANCARIAN
OLD_ITALICGOTHICUGARITICOLD_PERSIANDESERETSHAVIANOSMANYACYPRIOT_SYLLABARYIMPERIAL_ARAMAIC
PHOENICIANLYDIAN
KHAROSHTHIOLD_SOUTH_ARABIANAVESTANINSCRIPTIONAL_PARTHIANINSCRIPTIONAL_PAHLAVI
OLD_TURKICRUMI_NUMERAL_SYMBOLSBRAHMIKAITHI	CUNEIFORM!CUNEIFORM_NUMBERS_AND_PUNCTUATIONEGYPTIAN_HIEROGLYPHSBAMUM_SUPPLEMENTKANA_SUPPLEMENTBYZANTINE_MUSICAL_SYMBOLSMUSICAL_SYMBOLSANCIENT_GREEK_MUSICAL_NOTATIONTAI_XUAN_JING_SYMBOLSCOUNTING_ROD_NUMERALS!MATHEMATICAL_ALPHANUMERIC_SYMBOLSMAHJONG_TILESDOMINO_TILESPLAYING_CARDS ENCLOSED_ALPHANUMERIC_SUPPLEMENTENCLOSED_IDEOGRAPHIC_SUPPLEMENT%MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS	EMOTICONSTRANSPORT_AND_MAP_SYMBOLSALCHEMICAL_SYMBOLS"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D'CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENTTAGSVARIATION_SELECTORS_SUPPLEMENT SUPPLEMENTARY_PRIVATE_USE_AREA_A SUPPLEMENTARY_PRIVATE_USE_AREA_Br   r_   r4   r`   r5   r3   r{   r{      s   4GK ffg6K$VVW="667;"667; 9N*667C"-ffg"F"667;6673H%ffg>6673H1F1F1F#FFG<1F
ffg
.CFFG4I&&'2GVVW5J&&'2G6673H6673H0E0E1F&&'2GFFG4I&&'2Gvvw/D
ffg
.C&&'2G&&'2G6673Hffg6K6673H%ffg>6673H,7,P)0E0E&&'2G&&'2G0E6673H0EFFG4I5@QX5Y20E1Fffg6K8M6673H6673H6673HFFG4I0E1F6673H"667;%ffg>%0%I"-8-Q* +FFG D 9N%ffg>"-ffg"F"667;.9&&'.R+$VVW=vvw7L1F(A)&&'B"667;$/$H!(Affg6K 9N"667;'@6673H+6vvw+O('@"667;'@+6vvw+O(*5ffg*N''2667'K$VVW5J"667;1F%ffg>6673H#FFG<%ffg>*667C)&&'B!&&':O)4VVW)M&"-ffg"F6673H6673H6673H +FFG D1F#FFG<ffg6K#.vvw#G &1&&'&J##FFG<)4VVW)M&)&&'B(Avvw7Lffg6Kvvw/D
ffg
.C%ffg>0E'@"667;vvw7L +FFG D6673HVVW5J%ffg>6673H1F(A6673Hvvw/D$VVW=6673H%ffg>vvw7L"667;(A!&&':O"-ffg"F 9N"667;#.vvw#G $/$H!"-ffg"F%ffg> 9N&vvw?)&&'B%ffg>"-ffg"F$/$H!6673H$Wgw?$Wgw? '7;N''B!'7G<O':M'73F'73FWgw7J'73F7GW5Hgw8K'7G4G'7G4G'7G4G#GWg>"7GW=Wgw7J'73FWgw7J#GWg>'7G4G('7C''BWgw7J&wA'73F'73FGWg6I(3GWg(N%&wA"7GW=!'7G<O +GWg F!'7G<O%0'7%K"''B''B(3GWg(N%':Mw9L':M'27GW'M$&1'7G&L#,7',R)GWg6I +GWg F$Wgw?)4Wgw)O&)4Wgw)O&)4Wgw)O&.9'7G.T+w1D%0'7%K"'27GW'M$'28Xw'O$"b!G! !r5   r{   c                 r    t        |       } |t        j                  | |      S t        j                  |       S )zReturns the digit value assigned to the Unicode character uni_char as
    integer. If no such value is defined, default is returned, or, if not
    given, ValueError is raised.)r   r/   digit)r2   default_values     r3   r_  r_    s8     hH   =99  **r5   __main__rf   )r9   r   r/   utilr   r   r   objectr   rb   rd   r{   r_  r6   doctesttestmodr`   r5   r3   <module>rf     sl   IT   ) ).6v .6b/D, D.k!F k!\+ zGOO r5   