
    i?                         d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlZddlZddlZddlmZmZmZmZ ddlmZmZ ddlmZ  ej2                  e      Z G d d	e      Zy)
zH
FileMemory: Memory system for vectorizing and retrieving file contents
    N)Path)ListDictAny)
MemoryBaseMemoryContentSnapshot
MemoryItemMemoryWritePayload)MemoryStoreConfigFileSourceConfig)FileMemoryConfigc                   @    e Zd ZdZdef fdZd!dZd!dZded	e	d
e
dedee   f
dZdeddfdZd!dZd!dZdefdZdedee   fdZdedee   defdZd"dededee   fdZdedee   fdZdee   dee   fdZdedefdZd"dededdfdZdee   ddfd Z xZ S )#
FileMemoryz
    File-based memory system that indexes and retrieves content from files/directories.
    Supports multiple file types, chunking strategies, and incremental updates.
    storec                 &   |j                  t              }|st        d      t        |   |       |j
                  st        d      || _        |j
                  | _        | j                  j                  | _        d| _        d| _	        i | _
        y )Nz5FileMemory requires a file memory store configurationz=FileMemory requires at least one file_source in configurationi  2   )	as_configr   
ValueErrorsuper__init__file_sourcesfile_config
index_path
chunk_sizechunk_overlapfile_metadata)selfr   config	__class__s      R/Users/bowang/.openclaw/workspace/ChatDev/runtime/node/agent/memory/file_memory.pyr   zFileMemory.__init__!   s    !12TUU""\]]!4:4G4G**55  9;    returnNc                    | j                   rt        j                  j                  | j                         rit        j                  d| j                           | j                          | j                         r&t        j                  d       | j                          yyt        j                  d       | j                          | j                   r| j                          yy)z
        Load existing index or build new one from file sources.
        Validates index integrity and performs incremental updates if needed.
        zLoading existing index from z!Index updated due to file changesz$Building new index from file sourcesN)
r   ospathexistsloggerinfo_load_from_file_validate_and_update_indexsave_build_index_from_sources)r   s    r    loadzFileMemory.load5   s    
 ??rww~~doo>KK6t6GHI  " ..0?@		 1 KK>?**,		 r!   c                 f   | j                   st        j                  d       yt        j                  t        j
                  j                  | j                         d       | j                  | j                  D cg c]  }|j                          c}| j                  | j                  dd}t        | j                   dd	      5 }t        j                  ||d
d       ddd       t        j                  d| j                    dt!        | j                         d       yc c}w # 1 sw Y   HxY w)z Persist the memory index to diskz&No index_path specified, skipping saveNT)exist_ok)r   r   )r   contentsr   wutf-8encoding   F)indentensure_asciizIndex saved to z (z chunks))r   r'   warningr$   makedirsr%   dirnamer   r0   to_dictr   r   openjsondumpr(   len)r   itemdatafs       r    r+   zFileMemory.saveH   s    NNCD 	BGGOODOO4tD "//48MMBMDMB"oo!%!3!3
 $//39QIIdAae< : 	odoo%6bT]]9K8LHUV C :9s   :D"D''D0
agent_rolequerytop_ksimilarity_thresholdc           	      ,   | j                         dk(  rg S | j                  j                  |j                        }t	        |t
              r%t        j                  |t        j                        }|j                  dd      }t        j                  |       |j                  d   }g }g }| j                  D ]  }	|	j                  t        |	j                        |k7  r6t        j!                  d|	j"                  t        |	j                        |       ^|j%                  |	j                         |j%                  |	        |sg S t        j                  |t        j                        }t        j&                  |j                  d         }
|
j)                  |       |
j+                  |t-        |t        |                  \  }}g }t/        t        |d               D ]2  }|d   |   }|d   |   }|dk7  s||k\  s|j%                  ||          4 |S )aV  
        Retrieve relevant file chunks based on query.

        Args:
            agent_role: Agent role (not used in file memory)
            inputs: Query text
            top_k: Number of results to return
            similarity_threshold: Minimum similarity score

        Returns:
            List of MemoryItem with file chunks
        r   dtype   z8Skipping memory item %s: embedding dim %d != expected %d)count_memories	embeddingget_embeddingtext
isinstancelistnparrayfloat32reshapefaissnormalize_L2shaper0   r?   r'   r8   idappendIndexFlatIPaddsearchminrange)r   rC   rD   rE   rF   query_embeddingexpected_dimmemory_embeddingsvalid_itemsr@   indexsimilaritiesindicesresultsiidx
similaritys                    r    retrievezFileMemory.retrievea   s   &  A%I ..66uzzBot, hhbjjIO)11!R8?+&,,Q/ MMD~~)t~~&,6NNRT^^!4l !((8""4( " !IHH%6bjjI !!"3"9"9!"<=		#$ %_c%[IY>Z [g s71:'A!*Q-C%a+JbyZ+??{3/0 ( r!   payloadc                 .    t         j                  d       y)z
        FileMemory is read-only, updates are not supported.
        This method is a no-op to maintain interface compatibility.
        z6FileMemory.update() called but FileMemory is read-onlyN)r'   debug)r   rl   s     r    updatezFileMemory.update   s    
 	MNr!   c                    	 t        | j                  dd      5 }t        j                  |      }ddd       j	                  di       | _        |j	                  dg       }g }|D ]'  }	 |j                  t        j                  |             ) || _
        |j	                  di       }|j	                  d| j                        | _        |j	                  d	| j                        | _        t        j                  d
t        | j                         d       y# 1 sw Y   xY w# t        $ r Y w xY w# t        $ r0}t        j!                  d|        i | _        g | _
        Y d}~yd}~ww xY w)zLoad index from JSON filerr2   r3   Nr   r0   r   r   r   zLoaded z chunks from indexzError loading index: )r<   r   r=   r-   getr   rZ   r	   	from_dict	Exceptionr0   r   r   r'   r(   r?   error)r   rB   rA   raw_contentsr0   rawr   es           r    r)   zFileMemory._load_from_file   s7   	doosW=yy| > "&/2!>D88J3L)+H#OOJ$8$8$=> $
 %DM XXh+F$jjtGDO!'OT=O=O!PDKK'#dmm"4!55GHI% >= !   	LL045!#DDMM	sR   D2 D8D2 )$D#B	D2 D D2 #	D/,D2 .D//D2 2	E+;&E&&E+c                    g }| j                   D ]  }t        j                  d|j                          | j	                  |      }t        j                  dt        |       d|j                          |D ]/  }| j                  ||j                        }|j                  |       1  t        j                  dt        |              | j                  |      | _
        t        j                  dt        | j                         d       y)z(Build index by scanning all file sourceszScanning source: zFound z
 files in zTotal chunks to index: zIndex built with z chunksN)r   r'   r(   source_path_scan_filesr?   _read_and_chunk_filer4   extend_build_embeddingsr0   )r   
all_chunkssourcefiles	file_pathchunkss         r    r,   z$FileMemory._build_index_from_sources   s    
''FKK+F,>,>+?@A$$V,EKK&UJv7I7I6JKL"	229fooN!!&) # ( 	-c*o->?@ ..z:'DMM(:';7CDr!   c                    d}t               }| j                  D ]$  }| j                  |      }|j                  |       & t        | j                  j                               }||z
  }|r5t        j                  dt        |       d       | j                  |       d}| j                  D ]  }| j                  |      }|D ]  }| j                  |      }|| j                  vr7t        j                  d|        | j                  ||j                         d}Y| j                  |   j                  d      |k7  s{t        j                  d|        | j                  |g       | j                  ||j                         d}  |S )z
        Validate index integrity and update if files changed.

        Returns:
            True if index was updated, False otherwise
        Fz	Removing z deleted files from indexTzIndexing new file: hashzRe-indexing modified file: )setr   r{   ro   r   keysr'   r(   r?   _remove_files_from_index_compute_file_hash_index_filer4   rr   )	r   updatedcurrent_filesr   r   indexed_filesdeleted_filesr   	file_hashs	            r    r*   z%FileMemory._validate_and_update_index   st     ''F$$V,E  ' (
 D..3356%5KK)C$6#77PQR))-8G ''F$$V,E"	 33I>	 D$6$66KK"5i[ AB$$Y@"G ''	266v>)KKK"=i[ IJ119+>$$Y@"G # (& r!   r   c                    t        |j                        j                         j                         }|j	                         r*| j                  ||j                        rt        |      gS g S |j                         s$t        j                  d|j                          g S g }|j                  r`|j                  d      D ]J  }|j	                         s| j                  ||j                        s1|j                  t        |             L |S |j                  d      D ]J  }|j	                         s| j                  ||j                        s1|j                  t        |             L |S )z
        Scan file path and return list of matching files.

        Args:
            source: FileSourceConfig with path and filters

        Returns:
            List of absolute file paths
        zPath does not exist: *)r   rz   
expanduserresolveis_file_matches_file_types
file_typesstris_dirr'   r8   	recursiverglobrZ   glob)r   r   r%   r   r   s        r    r{   zFileMemory._scan_files  s'    F&&'224<<> <<>''f.?.?@D	{"I {{}NN263E3E2FGHI!ZZ_	$$&4+C+CIvO`O`+aLLY0 - 	 "YYs^	$$&4+C+CIvO`O`+aLLY0 , r!   r   r   c                 $    |y|j                   |v S )z*Check if file matches the file type filterT)suffix)r   r   r   s      r    r   zFileMemory._matches_file_types1  s    :--r!   r4   c                    	 t        |d|d      5 }|j                         }ddd       j                         sg S | j                  |      }t        j                  j                  |      }| j                  |      }g }	t        |      D ]H  \  }
}|	j                  |d|t        j                  j                  |      |||
t        |      |dd	       J ||t        |      t        j                         d
| j                   |<   |	S # 1 sw Y   xY w# t        $ r'}t        j	                  d| d|        g cY d}~S d}~ww xY w)z
        Read file and split into chunks.

        Args:
            file_path: Path to file
            encoding: File encoding

        Returns:
            List of chunk dictionaries with content and metadata
        rq   ignore)r4   errorsNzError reading file : file)source_typer   	file_namer   	file_sizechunk_indextotal_chunksr4   )contentmetadata)r   sizechunks_count
indexed_at)r<   readrt   r'   ru   stripr   r$   r%   getsize_chunk_text	enumeraterZ   basenamer?   timer   )r   r   r4   rB   r   rx   r   r   r   chunk_dictsrh   
chunk_texts               r    r|   zFileMemory._read_and_chunk_file7  s9   	ixIQ&&( J }}I ++I6	GGOOI.	 !!'* &v.MAz%#)!*!#!1!1)!<!*!*#$$'K (	   /" K))+	)
9% S JI 	LL.ykA3?@I	s3   D DD DD 	ED=7E=ErO   c                    t        |      | j                  k  r|gS g }d}|t        |      k  r|| j                  z   }||| }|t        |      k  rt        |j                  d      |j                  d      |j                  d      |j                  d      |j                  d      |j                  d      |j                  d            }|| j                  d	z  kD  r|d
|dz    }||z   dz   }|j	                  |j                                || j                  z
  }|t        |      k\  rn|t        |      k  r|D cg c]  }|s|	 c}S c c}w )z
        Split text into chunks with overlap.

        Args:
            text: Input text

        Returns:
            List of text chunks
        r   u   。u   ！u   ？.!?
r5   NrJ   )r?   r   maxrfindrZ   r   r   )r   rO   r   startendchunklast_sentencecs           r    r   zFileMemory._chunk_textn  sF    t9'6Mc$i$//)CsOE SY #KK&KK&KK&KK$KK$KK$KK%! !4??a#77!"4=1#45E-/!3CMM%++-( $,,,ED	!7 c$i: "'6aQ6'''s   >EEr   c           	         g }|D ]  }|d   }|d   }	 | j                   j                  |      }t        |t              r5t	        j
                  |t        j                        j                  dd      }t        j                  |       |j                         d   }|d	    d
|d    }	t        |	|||t        j                               }
|j!                  |
        |S # t        $ r#}t        j                  d|        Y d}~d}~ww xY w)z
        Generate embeddings for chunks and create MemoryItems.

        Args:
            chunks: List of chunk dictionaries

        Returns:
            List of MemoryItem objects
        r   r   rH   rJ   rK   r   z&Error generating embedding for chunk: Nr   _r   )rY   content_summaryr   rM   	timestamp)rM   rN   rP   rQ   rR   rS   rT   rU   rV   rW   tolistrt   r'   ru   r	   r   rZ   )r   r   memory_items
chunk_dictr   r   rM   embedding_listrx   item_idmemory_items              r    r~   zFileMemory._build_embeddings  s     J +G!*-H NN88A	i. ""** E M MaQS TI""9-!*!1!1!3A!6 "+./q-1H0IJG$ '!())+K ,3 !6 !  EaSIJs   BC	D(DDc                 D   t        j                         }	 t        |d      5 t        fdd      D ]  }|j	                  |        	 ddd       |j                         dd S # 1 sw Y   xY w# t        $ r%}t        j                  d| d|        Y d}~yd}~ww xY w)	zCompute MD5 hash of filerbc                  &     j                  d      S )Ni   )r   )rB   s   r    <lambda>z/FileMemory._compute_file_hash.<locals>.<lambda>  s    !&&,r!   r!   N   zError computing hash for r   ru   )	hashlibmd5r<   iterro   	hexdigestrt   r'   ru   )r   r   hash_md5r   rx   rB   s        @r    r   zFileMemory._compute_file_hash  s    ;;=	i&!!"6<EOOE* = ' %%',, '&  	LL4YKr!EF	s.   A1 &A%
A1 %A.*A1 1	B:BBc                     | j                  ||      }|r-| j                  |      }| j                  j                  |       yy)z4Index a single file (helper for incremental updates)N)r|   r~   r0   r}   )r   r   r4   r   	new_itemss        r    r   zFileMemory._index_file  s>    **9h?..v6IMM  + r!   
file_pathsc                     t        |      }| j                  D cg c]!  }|j                  j                  d      |vr|# c}| _        |D ]  }| j                  j                  |d         yc c}w )z Remove chunks from deleted filesr   N)r   r0   r   rr   r   pop)r   r   file_paths_setr@   r   s        r    r   z#FileMemory._remove_files_from_index  sm    Z "]]
*T}}  -^C ]
 $I""9d3 $
s   &A,)r"   N)r2   )!__name__
__module____qualname____doc__r   r   r-   r+   r   r   intfloatr   r	   rk   r
   ro   r)   r,   boolr*   r   r{   r   r   r   r|   r   r~   r   r   r   __classcell__)r   s   @r    r   r      s]   
;/ ;(&W2AA %A 	A
 $A 
j	AF0 T 6E(,D ,\$"2 $tCy $L.T .tCy .T .5c 5S 5tTXz 5n-( -(S	 -(^'T
 'tJ7G 'R
C 
C 
,S ,C ,d ,449 4 4r!   r   )r   r=   r$   r   loggingpathlibr   typingr   r   r   r   rV   numpyrR   %runtime.node.agent.memory.memory_baser   r   r	   r
   entity.configsr   r   entity.configs.node.memoryr   	getLoggerr   r'   r    r!   r    <module>r      s\     	    " "     ? 7			8	$J4 J4r!   