
    iO                         d dl mZmZ d dlZd dlZd dlmZmZ d dlZd dl	m
Z
mZmZ d dlmZ  ej                  e      Z G d de      Z G d d	      Z G d
 de      Z G d de      Zy)    )ABCabstractmethodN)ListOptional)retrystop_after_attemptwait_random_exponential)EmbeddingConfigc                   T    e Zd ZdefdZed        ZdedefdZd
dede	de
e   fdZy	)EmbeddingBaseembedding_configc                     || _         y N)config)selfr   s     P/Users/bowang/.openclaw/workspace/ChatDev/runtime/node/agent/memory/embedding.py__init__zEmbeddingBase.__init__   s	    &    c                      y r    r   texts     r   get_embeddingzEmbeddingBase.get_embedding   s    r   r   returnc                     |syt        j                  dd|j                               }t        j                  dd|      }t        j                  dd|j                               }|S )z-Preprocess text to improve embedding quality. z\s+ z[^\w\s\u4e00-\u9fff])resubstripr   s     r   _preprocess_textzEmbeddingBase._preprocess_text   sV     vvfc4::<0 vv-sD9 vvfc4::<0r   
max_lengthc                 R   t        |      |k  r|gS t        j                  d|      }g }d}|D ]U  }|j                         }|st        ||z         |k  r	||dz   z  }0|r|j	                  |j                                |dz   }W |r|j	                  |j                                |S )z9Split long text into chunks to improve embedding quality.z[\u3002\uff01\uff1f\uff1b\n]r   u   。)lenr   splitr    append)r   r   r"   	sentenceschunkscurrent_chunksentences          r   _chunk_textzEmbeddingBase._chunk_text*   s    t9
"6M HH<dC	!H~~'H=8+,
:H!44 MM-"5"5"78 (8 3 " MM---/0r   N)i  )__name__
__module____qualname__r
   r   r   r   strr!   intr   r+   r   r   r   r   r      sS    ' '  S S    tCy r   r   c                   &    e Zd Zededefd       Zy)EmbeddingFactoryr   r   c                 v    | j                   }|dk(  rt        |       S |dk(  rt        |       S t        d|       )NopenailocalzUnsupported embedding model: )providerOpenAIEmbeddingLocalEmbedding
ValueError)r   models     r   create_embeddingz!EmbeddingFactory.create_embeddingF   sH     ))H"#344g!"233<UGDEEr   N)r,   r-   r.   staticmethodr
   r   r;   r   r   r   r2   r2   E   s&    F? F} F Fr   r2   c                   v     e Zd Zdef fdZ e edd       ed            d        Zd	e	d
e
e   fdZ xZS )r7   r   c                 *   t         |   |       |j                  | _        |j                  | _        |j                  xs d| _        |j                  j                  dd      | _        |j                  j                  dd      | _	        |j                  j                  dd      | _
        d| _        | j                  r1t        j                  | j                  | j                  	      | _        y t        j                  | j                  
      | _        y )Nztext-embedding-3-smallr"   i  use_chunkingFchunk_strategyaveragei   )api_keybase_url)rB   )superr   rC   rB   r:   
model_nameparamsgetr"   r?   r@   _fallback_dimr4   OpenAIclient)r   r   	__class__s     r   r   zOpenAIEmbedding.__init__Q   s    )*(11'//*00L4L*1155lDI,3377N.5599:JIV!== --t}}UDK --=DKr         )minmax
   )waitstopc                 2   | j                  |      }|s%t        j                  d       dg| j                  z  S | j                  r)t        |      | j                  kD  r| j                  |      S |d | j                   }	 | j                  j                  j                  || j                  d      }|j                  d   j                  }t        |      | _        |S # t        $ r2}t        j                  d|        dg| j                  z  cY d }~S d }~ww xY w)NzEmpty text after preprocessing        floatinputr:   encoding_formatr   zError getting embedding: )r!   loggerwarningrH   r?   r$   r"   _get_chunked_embeddingrJ   
embeddingscreaterE   data	embedding	Exceptionerror)r   r   processed_texttruncated_textresponser_   es          r   r   zOpenAIEmbedding.get_embedding`   s    ..t4NN;<54---- ^!4t!F..~>> ((89	.{{--44$oo ' 5 H
 !a(22I!$YD 	.LL4QC8954----	.s   >AC 	D$'DDDr   r   c           	        	 | j                  || j                  dz        }|sdg| j                  z  S g }|D ]]  }	 | j                  j                  j                  || j                  d      }|j                  |j                  d   j                         _ |sdg| j                  z  S | j                  dk(  rDt        t        |d               D cg c]"  t!        fd	|D              t        |      z  $ c}S | j                  d
k(  rxt        t        |            D cg c]
  }d|dz   z   c}	t!        	      }t        t        |d               D cg c]#  t!        	fdt#        |      D              |z  % c}S |d   S # t        $ r#}t        j                  d|        Y d}~xd}~ww xY wc c}w c c}w c c}w )z2Chunk long text, embed each chunk, then aggregate.rL   rT   rU   rV   r   zError getting chunk embedding: NrA   c              3   (   K   | ]	  }|     y wr   r   ).0chunkis     r   	<genexpr>z9OpenAIEmbedding._get_chunked_embedding.<locals>.<genexpr>   s     ?.>Ua.>s   weightedg      ?   c              3   :   K   | ]  \  }}|   |   z    y wr   r   )rh   jri   rj   weightss      r   rk   z9OpenAIEmbedding._get_chunked_embedding.<locals>.<genexpr>   s%     Z>Y(!Ua71:->Ys   )r+   r"   rH   rJ   r\   r]   rE   r&   r^   r_   r`   rY   rZ   r@   ranger$   sum	enumerate)
r   r   r(   chunk_embeddingsri   rd   re   rj   total_weightrp   s
          ` @r   r[   z&OpenAIEmbedding._get_chunked_embedding}   s   !!$1(<=54----E	;;1188//$+ 9 
 !''a(8(B(BC   54---- )+ "#&6q&9":;=;q ?.>??#FVBWW;= =  J..3C8H4I.JK.Jsa!e}.JKGw<L!#&6q&9":;=;q ZiHX>YZZ]ii;= = $A&&)  !@DE= L=s*   AF	'F8"F=(G		F5F00F5)r,   r-   r.   r
   r   r   r	   r   r   r/   r   rU   r[   __classcell__rK   s   @r   r7   r7   P   sP    > > 'A15<Nr<RS. T.8$'3 $'4; $'r   r7   c                   *     e Zd Zdef fdZd Z xZS )r8   r   c                 f   t         |   |       |j                  j                  d      | _        |j                  j                  dd      | _        d| _        | j                  st        d      	 ddlm	}  || j                  | j
                        | _
        y # t        $ r t        d	      w xY w)
N
model_pathdevicecpui   z,LocalEmbedding requires model_path parameterr   )SentenceTransformer)r{   z4sentence-transformers is required for LocalEmbedding)rD   r   rF   rG   rz   r{   rH   r9   sentence_transformersr}   r:   ImportError)r   r   r}   rK   s      r   r   zLocalEmbedding.__init__   s    )**1155lC&--11(EB KLL	VA,T__T[[QDJ 	VTUU	Vs   1)B B0c                 D   | j                  |      }|sdg| j                  z  S 	 | j                  j                  |d      }|j	                         }t        |      | _        |S # t        $ r2}t        j                  d|        dg| j                  z  cY d }~S d }~ww xY w)NrT   F)convert_to_tensorzError getting local embedding: )	r!   rH   r:   encodetolistr$   r`   rY   ra   )r   r   rb   r_   resultre   s         r   r   zLocalEmbedding.get_embedding   s    ..t454----	.

)).E)RI%%'F!$VDM 	.LL:1#>?54----	.s   >A$ $	B-'BBB)r,   r-   r.   r
   r   r   rv   rw   s   @r   r8   r8      s    V V .r   r8   )abcr   r   r   loggingtypingr   r   r4   tenacityr   r   r	   entity.configsr
   	getLoggerr,   rY   r   r2   r7   r8   r   r   r   <module>r      si    # 	  !   +			8	$1C 1f	F 	FQ'm Q'f.] .r   