
    ij%                     F   d Z ddlZddlZddlmZmZ ddlmZmZm	Z	 ddl
mZmZmZ ddlmZmZ  G d de      Z G d	 d
e      Z G d de      Z G d de      Z	 	 d#dddddddede	e   de	e   deez  dz  dededededefdZdddefdZd ee   d!edeee      fd"Zy)$zwSplit strategies for dynamic node execution.

Provides different methods to split input messages into execution units.
    N)ABCabstractmethod)ListAnyOptional)SplitConfigRegexSplitConfigJsonPathSplitConfig)MessageMessageRolec                   <    e Zd ZdZedee   deee      fd       Zy)Splitterz(Abstract base class for input splitters.inputsreturnc                      y)zSplit inputs into execution units.
        
        Args:
            inputs: Input messages to split
            
        Returns:
            List of message groups, each group is one execution unit
        N )selfr   s     B/Users/bowang/.openclaw/workspace/ChatDev/runtime/node/splitter.pysplitzSplitter.split   s     	    N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s2    2	DM 	d4=.A 	 	r   r   c                   2    e Zd ZdZdee   deee      fdZy)MessageSplitterz;Split by message - each message becomes one execution unit.r   r   c                 .    |D cg c]  }|g c}S c c}w )z+Each input message becomes a separate unit.r   )r   r   msgs      r   r   zMessageSplitter.split"   s    !'(#(((s   
N)r   r   r   r   r   r   r   r   r   r   r   r      s%    E)DM )d4=.A )r   r   c                   l    e Zd ZdZdddddddedeez  dz  d	ed
ededefdZdee	   deee	      fdZ
y)RegexSplitterzSplit by regex pattern matches.NTFpassgroupcase_sensitive	multilinedotallon_no_matchpatternr#   r$   r%   r&   r'   c                    d}|s|t         j                  z  }|r|t         j                  z  }|r|t         j                  z  }t        j                  ||      | _        || _        || _        y)a  Initialize with regex pattern and options.
        
        Args:
            pattern: Regex pattern to match
            group: Capture group name or index. Defaults to entire match (0).
            case_sensitive: Whether the regex should be case sensitive.
            multiline: Enable multiline mode (re.MULTILINE).
            dotall: Enable dotall mode (re.DOTALL).
            on_no_match: Behavior when no match is found ('pass' or 'empty').
        r   N)re
IGNORECASE	MULTILINEDOTALLcompiler(   r#   r'   )r   r(   r#   r$   r%   r&   r'   flagss           r   __init__zRegexSplitter.__init__*   s]    ( R]]"ER\\!ERYYEzz'51
&r   r   r   c           	         g }|D ]%  }|j                         }t        | j                  j                  |            }|sm| j                  dk(  r|j                  |g       nJ| j                  dk(  r;t        |j                  di |j                  ddd      }|j                  |g       |D ]z  }| j                  	 |j                  | j                        }n|j                  d      }|d}t        |j                  |i |j                  d	di      }|j                  |g       | ( |r|S |D cg c]  }|g c}S # t        t        j                  f$ r |j                  d      }Y w xY wc c}w )
z5Split by finding all regex matches across all inputs.r!   empty regexT)split_sourcesplit_no_matchrolecontentmetadatar   r5   )text_contentlistr(   finditerr'   appendr   r8   r:   r#   
IndexErrorr*   error)	r   r   unitsr   textmatchesunit_msgmatch
match_texts	            r   r   zRegexSplitter.splitJ   sj   %'C##%D 4<<0067G##v-LL#'%%0& XX "!bCLL!b']a!b H
 LL(, ::)4%*[[%<
 "'QJ%!#J"&FFngF
 hZ(% !) P u<V#<VcSEV#<< '1 4%*[[^
4 $=s   >E6
E3*E0/E0)r   r   r   r   strintboolr0   r   r   r   r   r   r   r    r    '   s    ) #'#!'' Sy4	'
 ' ' ' '@,=DM ,=d4=.A ,=r   r    c                   T    e Zd ZdZdefdZdedee   fdZdee	   deee	      fdZ
y	)
JsonPathSplitterz$Split by JSON array path extraction.	json_pathc                     || _         y)zInitialize with JSON path.
        
        Args:
            json_path: Simple dot-notation path to array (e.g., 'items', 'data.results')
        N)rL   )r   rL   s     r   r0   zJsonPathSplitter.__init__|   s     #r   datar   c                    | j                   st        |t              r|S |gS | j                   j                  d      }|}|D ]o  }t        |t              r|j                  |      }nEt        |t              r1|j                         r!t        |      }|t        |      k  r||   nd}ng c S |mg c S  t        |t              r|S |gS )z7Extract array from data using simple dot notation path..N)	rL   
isinstancer<   r   dictgetisdigitrH   len)r   rN   partscurrentpartidxs         r   _extract_arrayzJsonPathSplitter._extract_array   s    ~~$%6M$$S)D'4(!++d+GT*t||~$i*-G*<'#,$		  gt$Nyr   r   c           	          g }|D ]  }|j                         }	 t        j                  |      }| j                  |      }|D ]u  }t	        |t
        t        f      rt        j                  |d      }nt        |      }t        |j                  |i |j                  ddi      }	|j                  |	g       w  |r|S |D cg c]  }|g c}S # t        j                  $ r |j                  |g       Y w xY wc c}w )z2Split by extracting array items from JSON content.F)ensure_asciir5   rL   r7   )r;   jsonloadsrZ   rQ   rR   r<   dumpsrG   r   r8   r:   r>   JSONDecodeError)
r   r   rA   r   rB   rN   itemsitemr9   rD   s
             r   r   zJsonPathSplitter.split   s    %'C##%D$zz$'++D1!D!$t5"&**T"F"%d)& XX '!NCLL!N.+!N H
 LL(, " 2 u<V#<VcSEV#<<	 '' $cU#$ $=s   B C
C;%C87C8N)r   r   r   r   rG   r0   r   r   rZ   r   r   r   r   r   rK   rK   y   sH    .## #3 49 4=DM =d4=.A =r   rK   TFr!   r"   
split_typer(   rL   r#   r$   r%   r&   r'   r   c                    | dk(  r
t               S | dk(  r|st        d      t        ||||||      S | dk(  r|st        d      t        |      S t        d|        )a  Factory function to create appropriate splitter.
    
    Args:
        split_type: One of 'message', 'regex', 'json_path'
        pattern: Regex pattern (required for 'regex' type)
        json_path: JSON path (required for 'json_path' type)
        group: Capture group for regex (optional)
        case_sensitive: Case sensitivity for regex (default True)
        multiline: Multiline mode for regex (default False)
        dotall: Dotall mode for regex (default False)
        on_no_match: Behavior when no regex match ('pass' or 'empty')
        
    Returns:
        Configured Splitter instance
        
    Raises:
        ValueError: If required arguments are missing
    messager4   z*regex splitter requires 'pattern' argumentr"   rL   z0json_path splitter requires 'json_path' argumentUnknown split type: )r   
ValueErrorr    rK   )rc   r(   rL   r#   r$   r%   r&   r'   s           r   create_splitterrh      s    : Y  	w	IJJ)#
 	
 
{	"OPP	**/
|<==r   split_configr   c                    | j                   dk(  r
t               S | j                   dk(  ro| j                  t              }|st	        d      t        |j                  |j                  |j                  |j                  |j                  |j                        S | j                   dk(  r7| j                  t              }|st	        d      t        |j                        S t	        d| j                          )zCreate a splitter from a SplitConfig object.
    
    Args:
        split_config: The split configuration
        
    Returns:
        Configured Splitter instance
    re   r4   z!Invalid regex split configurationr"   rL   z%Invalid json_path split configurationrf   )typer   as_split_configr	   rg   r    r(   r#   r$   r%   r&   r'   r
   rK   rL   )ri   regex_configjson_configs      r   create_splitter_from_configro      s     I%  			g	%#334DE@AA  $$'66",,&&$00
 	
 
		k	)"223FGDEE 5 566/0A0A/BCDDr   messages
group_sizec                 v    | sg S g }t        dt        |       |      D ]  }|j                  | |||z            |S )zGroup messages into batches for tree reduction.
    
    Args:
        messages: Messages to group
        group_size: Target size per group
        
    Returns:
        List of message groups. Last group may have fewer items.
    r   )rangerU   r>   )rp   rq   groupsis       r   group_messagesrv     sF     	"$F1c(mZ0hqZ01 1 Mr   )NN)r   r]   r*   abcr   r   typingr   r   r   entity.configs.dynamic_baser   r	   r
   entity.messagesr   r   r   r   r    rK   rG   rH   rI   rh   ro   rv   r   r   r   <module>r{      s#  
  	 # & & Z Z 0s  )h )O=H O=dB=x B=N "#/>
 #/>/>c]/> }/>
 9t/> /> /> /> /> />dEm E E@T']  T']@S r   