
    !}g3                     h   d Z dZddlZddlZddlZddlmZ ddlZddlZddl	Z	ddl
mZ ddlZddlZddlmZ ddlZddlmZ d	a ej                    adai ad
ad
ai ada ej        ej                  Z ej        e           a!tB          "                    ej#                   tB          $                    e           d Z%d Z&d$dZ'd Z(d Z)d Z*e(d             Z+d Z,d Z-d%da.d&da/e(d             Z0e(d$d            Z1t\          Z2t^          Z3d Z4d Z5d Z6d Z7e(d$d            Z8d Z9d  Z:d! Z;d'd#Z<dS )(z0.34MIT    N   )finalseg)logwraps)md5dict.txt        Fc                 :    t                               |            d S N)loggersetLevel)	log_levels    N/var/www/py-google-trends/myenv/lib/python3.11/site-packages/jieba/__init__.pysetLogLevelr       s    
OOI    c           
         i }t                      }d}t          | d          5 }d}|                                                                                    d                              d          D ]}|dz  }	 |                    d          d d         \  }}t          |          }|||<   ||z  }t          t          |                    D ]"}	|	                    |d |	dz                       ## t          $ r)}
t                              | d	|d|           |
d }
~
ww xY w	 d d d            n# 1 swxY w Y   |||fS )
Nr   rbr   utf-8
r       z	 at line )setopenreadrstripdecodesplitfloatrangelenadd
ValueErrorr   debug)f_namelfreqpfdictltotalflinenolinewordfreqches              r   
gen_pfdictr1   $   s   EUUFF	fd		 qFFHHOO%%,,W55;;DAA 	 	DaKF	 JJsOOBQB/	TT{{"d$D		** , ,BJJtERTE{++++,   66666644HIII	               5&  s7   AD6:A8C32D63
D&=$D!!D&&D66D:=D:c                    | st           } t          5  t          r	 d d d            d S t          rbd at          j                            t          j                            t	          j                    t          j        	                    t                                        }t          j                            ||           }t                              d|z             t          j                    }|t          j                            |d          k    r2t          j                            t          j                    d          }ngt          j                            t          j                    dt!          |                    dd                                                    z            }d}t          j                            |          rt          j                            |          t          j                            |          k    rt                              d|z             	 t+          |d	          5 }t-          j        |          \  aaaad d d            n# 1 swxY w Y   t7          t          t8                     }n	#  d}Y nxY w|rHt;          |          \  aaat=          d
 t0                                          D                       atA          t0          !                                          at                              d|z             	 t          j"                    \  }}t	          j#        |d          5 }	t-          j$        t          t0          t2          t4          f|	           d d d            n# 1 swxY w Y   t          j%        dk    rddl&m'}
 nt          j(        }
 |
||           n!#  t          )                    d           Y nxY wdat                              dt          j                    |z
  z             t                              d           d d d            d S # 1 swxY w Y   d S )Nz Building prefix dict from %s ...r
   zjieba.cachezjieba.u%s.cacher   replaceTzLoading model from cache %sr   c              3   h   K   | ]-\  }}|t          t          |          t          z            fV  .d S r   )r   r    total).0kvs      r   	<genexpr>zinitialize.<locals>.<genexpr>Y   s=      HHCAa3uQxx~../HHHHHHr   zDumping model to file cache %swbntr   )movezDump cache file failed.zLoading model cost %s seconds.z'Prefix dict has been built succesfully.)*
DICTIONARY	DICT_LOCKinitializedr(   ospathnormpathjoingetcwddirname__file__r   r%   timetempfile
gettempdirr	   encode	hexdigestexistsgetmtimer   marshalloadFREQr5   min_freq
isinstancer   r1   dictitemsminvaluesmkstempfdopendumpnameshutilr<   rename	exception)
dictionary_curpathabs_patht1
cache_fileload_from_cache_failcffdfpathtemp_cache_filereplace_files              r   
initializeri   8   s     
	 /@ /@ 	/@ /@ /@ /@ /@ /@ /@ /@  	F7##BGLLbgooh>W>W$X$XYY7<<*557(BCCCY[[rw||Hj9999h&9&;&;]KKJJh&9&;&;=NQTU]UdUdelnwUxUxQyQy  RD  RD  RF  RF  >F  G  GJ#7>>*%% 	,"'*:*::*F*FIYIYZbIcIc*c*cLL6CDDD,*d++ Br18b1A1A.F4hB B B B B B B B B B B B B B B ,6fc+B+B'B$$,'+$$$ 	< *8 4 4F4HH4::<<HHHHHD4;;==))HLL9JFGGG
<$,..	EYr4(( POL&eH!=OOOP P P P P P P P P P P P P P P7d??;;;;;;;#%9LUJ////<  !:;;;;;5r9IJKKK>???_/@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@ /@s   	P8HP8-J
=I#J
#I'	'J
*I'	+J
	P8
JBP8+N=.N6N=N	N=	N	
2N=<P8=OAP88P<?P<c                 <     t                      fd            }|S )Nc                  Z    t           r | i |S t          t                      | i |S r   )r?   ri   r=   )argskwargsfns     r   wrappedz$require_initialized.<locals>.wrappedp   sE      	'2t&v&&&z"""2t&v&&&r   r   )rn   ro   s   ` r   require_initializedrp   n   s3    
2YY' ' ' ' Y' Nr   c              #     K   t          |           }d}|                                D ]X\  }}t          |          dk    r$||k    r| ||d         dz            V  |d         }<|D ]}||k    r| ||dz            V  |}Yd S )Nr   r   )get_DAGrT   r"   )sentencedagold_jr7   Ljs         r   	__cut_allry   |   s      
(

CEyy{{  !q66Q;;1u991QqT!V8$$$$aDEE  q55"1QqS5/)))E r   c                      t                     }d|<   t          |dz
  dd          D ](t           fd|         D                       <   )d S )N)r    r   rr   c              3      K   | ]C}t                               |d z            t                    |d z            d         z   |fV  DdS )r   r   N)rP   getrQ   )r6   xidxroutert   s     r   r9   zcalc.<locals>.<genexpr>   sV      eeWX$((8C!G#4X>>qsANPQReeeeeer   )r"   r!   max)rt   DAGr   r   Ns   ` `` r   calcr      s{    HAE!HQqS"b!! f feeeeee\_`c\deeeeec

f fr   c                 8   i }t          |           }t          |          D ]x}g }|}| |         }||k     rH|t          v r?|t          v r|                    |           |dz  }| ||dz            }||k     r	|t          v ?|s|                    |           |||<   y|S Nr   )r"   r!   r(   rP   append)rt   r   r   r7   tmplistifrags          r   rs   rs      s     CHA1XX  {!eet||q!!!FAAacE?D	 !ee
  	NN1AJr   c              #     K   t          j        dt           j                  }t          |           }i }t	          | |d|           d}t          |           }d}||k     r_||         d         dz   }| ||         }|                    |          rt          |          dk    r||z  }|}n|r|V  d}|V  |}||k     _|r|V  d}d S d S )Nz[a-zA-Z0-9]r   r{   r   )recompileUrs   r   r"   match)	rt   re_engr   r   r~   r   bufyl_words	            r   __cut_DAG_NO_HMMr      s     Zrt,,F
(

CE35!!!	AHA
C
a%%!HQK!O!A#<< 	CKK1$4$46MCAA 			LLLA a%%  			 r   c              #   (  K   t          |           }i }t          | |d|           d}d}t          |           }||k     r||         d         dz   }| ||         }||z
  dk    r||z  }nR|rLt          |          dk    r|V  d}n2|t          vrt	          j        |          }|D ]}	|	V  n	|D ]}
|
V  d}|V  |}||k     |rJt          |          dk    r|V  d S |t          vrt	          j        |          }|D ]}	|	V  d S |D ]}
|
V  d S d S )Nr   )r   r{   r   )rs   r   r"   rP   r   cut)rt   r   r   r~   r   r   r   r   
recognizedtelems              r   	__cut_DAGr      s     
(

CE3''''	A
CHA
a%%!HQKM!A#Q3!886MCC s88q==IIICC4%-\#%6%6
!+ $ $A"#GGGG$ %( ' 'D"&JJJJCLLL) a%%,  	s88q==IIIIIoo!c**J      



	 	 r   Tc              #     K   t          | t                    r=	 |                     d          } n&# t          $ r |                     dd          } Y nw xY w|r?t	          j        dt          j                  t	          j        dt          j                  }}n>t	          j        dt          j                  t	          j        dt          j                  }}|                    |           }|rt          }n|rt          }nt          }|D ]q}|s|                    |          r ||          D ]}|V  -|                    |          }	|	D ],}
|                    |
          r|
V  |s
|
D ]}|V  (|
V  -rdS )	a7  The main function that segments an entire sentence that contains
    Chinese characters into seperated words.
    Parameter:
        - sentence: The str to be segmented.
        - cut_all: Model type. True for full pattern, False for accurate pattern.
        - HMM: Whether to use the Hidden Markov Model.
    r   gbkignoreu   ([一-龥]+)z[^a-zA-Z0-9+#
]u   ([一-龥a-zA-Z0-9+#&\._]+)z(
|\s)N)rR   bytesr   UnicodeDecodeErrorr   r   r   r   ry   r   r   r   )rt   cut_allHMMre_hanre_skipblocks	cut_blockblkr-   tmpr~   xxs               r   r   r      s      (E"" 8	8w//HH! 	8 	8 	8uh77HHH	8  o*%924@@"*M_acaeBfBf*%H"$OOQSQ[\gikimQnQn\\(##F %			 %		$	   	<< 	!	#  



 --$$C  ==## GGGG   ! ! ! GGGG s   /  AAc              #   z  K   t          | |          }|D ]}t          |          dk    r<t          t          |          dz
            D ]}|||dz            }|t          v r|V  t          |          dk    r<t          t          |          dz
            D ]}|||dz            }|t          v r|V  |V  d S )Nr   r   r      )r   r"   r!   rP   )rt   r   wordswr   gram2gram3s          r   cut_for_searchr     s      c"""E  q66A::3q66!8__    !AaC%D==KKKq66A::3q66!8__    !AaC%D==KKK r   c                    t          | t                    rt          | d          } |                                                     d          }d}|                    d          D ]}}|dz  }|                                s|                    d          }|d         |d         }}|                                du rX|dk    r|                    dd	          }t          |  ~d
S )z Load personalized dict to improve detect rate.
    Parameter:
        - f : A plain text file contains words and their ocurrences.
    Structure of dict file:
    word1 freq1 word_type1
    word2 freq2 word_type2
    ...
    Word type may be ignored
    r   r   r   r   r   r   Fu   ﻿r{   N)
rR   strr   r   r   r   r   isdigitr3   add_word)r*   contentline_nor,   tupr-   r.   s          r   load_userdictr     s     !S DMMffhhoog&&GGd## 
 
1{{}} 	jjooVSVd<<>>U""a<<<<,,D#
 
r   c                 "   t          t          |          t          z            t          | <   ||                                t
          | <   t          t          |                     D ]'}t          	                    | d |dz                       (d S r   )
r   r    r5   rP   stripuser_word_tag_tabr!   r"   r(   r#   )r-   r.   tagr/   s       r   r   r   :  s     U4[[5())DJ
"%))++$CII    

4A;   r   c                 <    t          t          | d                    S NFlist	__ref_cutrt   s    r   __lcutr   F  s    	(E**+++r   c                 >    t          t          | dd                    S r   r   r   s    r   __lcut_no_hmmr   H  s    	(E511222r   c                 <    t          t          | d                    S )NTr   r   s    r   
__lcut_allr   J  s    	(D))***r   c                 :    t          t          |                     S r   )r   __ref_cut_for_searchr   s    r   __lcut_for_searchr   L  s    $X..///r   c                     t           j        dk    rt          d          ddlm}m} | 
 |            }  ||           ad	d}d }|a|ad S )
Nr;   z/jieba: parallel mode only supports posix systemr   )Pool	cpu_countFTc              3   @  K   t          j        d                              |           }|r!t                              t
          |          }nC|r!t                              t          |          }n t                              t          |          }|D ]}|D ]}|V  d S Nz([
]+))r   r   r   poolmapr   r   r   )rt   r   r   partsresultrr   s          r   pcutzenable_parallel.<locals>.pcutZ  s      
;''--h77 	4XXj%00FF 	4XXfe,,FFXXmU33F 	 	A  	 	r   c              3      K   t          j        d                              |           }t                              t
          |          }|D ]}|D ]}|V  d S r   )r   r   r   r   r   r   )rt   r   r   r   r   s        r   pcut_for_searchz(enable_parallel.<locals>.pcut_for_searchf  so      
;''--h77+U33 	 	A  	 	r   FT)	r@   rZ   	Exceptionmultiprocessingr   r   r   r   r   )
processnumr   r   r   r   s        r   enable_parallelr   P  s     
w$IJJJ////////Y[[
4
D
 
 
 
   C$NNNr   c                  x    dt                      v rt                                           d at          at
          ad S )Nr   )globalsr   closer   r   r   r    r   r   disable_parallelr   p  s1    


C)NNNr   c                 L   t           5  t          j                            t          j                            t          j                    |                     }t          j                            |          st          d|z             |ada	d d d            d S # 1 swxY w Y   d S )Nzjieba: path does not exist: F)
r>   r@   rA   rB   rC   rD   rL   r   r=   r?   )dictionary_pathr`   s     r   set_dictionaryr   x  s    	  7##BGLLo$N$NOOw~~h'' 	G:XEFFF
                 s   BBB Bc                  0   t           j                            t           j                            t          j                    t           j                            t                                        } t           j                            | t                    }|S r   )r@   rA   rB   rC   rD   rE   rF   r=   )r_   r`   s     r   get_abs_path_dictr     sU    wRY[["'//(:S:S T TUUHw||HZ00HOr   defaultc              #     K   t          | t                    st          d          d}|dk    r4t          | |          D ] }t	          |          }||||z   fV  ||z  }!dS t          | |          D ]}t	          |          }t	          |          dk    rHt          t	          |          dz
            D ](}|||dz            }|t          v r|||z   ||z   dz   fV  )t	          |          dk    rHt          t	          |          dz
            D ](}|||dz            }|t          v r|||z   ||z   dz   fV  )||||z   fV  ||z  }dS )	zTokenize a sentence and yields tuples of (word, start, end)
    Parameter:
        - sentence: the str to be segmented.
        - mode: "default" or "search", "search" is for finer segmentation.
        - HMM: whether to use the Hidden Markov Model.
    z)jieba: the input parameter should be str.r   r   r   r   r   r   N)rR   r   r   r   r"   r!   rP   )	unicode_sentencemoder   startr   widthr   r   r   s	            r   tokenizer     s      &,, ECDDDEy%3/// 	 	AFFEeU5[))))UNEE	 	
 %3/// 	 	AFFE1vvzzs1vvax : :Aa!eHE}}$eAguQwqy99991vvzzs1vvax : :Aa!eHE}}$eAguQwqy9999eU5[))))UNEE	 	r   r   r   )T)r   T)=__version____license__r   r@   sysr{   r   rG   rH   rN   mathr   random	threading	functoolsr   logginghashlibr	   r=   RLockr>   r(   rP   rQ   r5   r   r?   StreamHandlerstderrlog_console	getLogger__name__r   r   DEBUG
addHandlerr   r1   ri   rp   ry   r   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s   				 				 



                                 
IO			 #g#CJ//		8	$	$       +     ! ! !(3@ 3@ 3@ 3@l    f f f   $  0& & &P+ + + +Z      4         	% , , ,3 3 3+ + +0 0 0 % % % %>* * *    
     r   