+
    im                     n    R t ^ RIt^ RIHt ^ RIHtHtHt 0 RmtRR R llt	R R lt
R R	 ltR
 R ltR# )zHEntity extraction from Phase 1 search results for supplemental searches.N)Counter)AnyDictListc                   V ^8  d   QhR\         \        \        \        3,          ,          R\         \        \        \        3,          ,          R\        R\        R\        R\        \        \         \        ,          3,          /# )   reddit_itemsx_itemsmax_handlesmax_hashtagsmax_subredditsreturn)r   r   strr   int)formats   "Z/Users/bowang/.openclaw/workspace/skills/last30days-official/scripts/lib/entity_extract.py__annotate__r      sk      tCH~&$sCx.!  	
  
#tCy.    c                f    \        V4      p\        V4      p\        V 4      pRVRV RVRV RVRV /# )a  Extract key entities from Phase 1 results for supplemental searches.

Parses X results for @handles and #hashtags, Reddit results for subreddit
names and cross-referenced communities.

Args:
    reddit_items: Raw Reddit item dicts from Phase 1
    x_items: Raw X item dicts from Phase 1
    max_handles: Maximum handles to return
    max_hashtags: Maximum hashtags to return
    max_subreddits: Maximum subreddits to return

Returns:
    Dict with keys: x_handles, x_hashtags, reddit_subreddits
	x_handlesN
x_hashtagsreddit_subreddits)_extract_x_handles_extract_x_hashtags_extract_subreddits)r   r	   r
   r   r   handleshashtags
subredditss   &&&&&   r   extract_entitiesr      sR    , !)G"7+H$\2J 	W\k*h}-Z8 r   c                ~    V ^8  d   QhR\         \        \        \        3,          ,          R\         \        ,          /# r   r	   r   r   r   r   r   )r   s   "r   r   r   2   s+     7 7T#s(^ 4 7c 7r   c                
   \        4       pV  F  pVP                  RR4      P                  4       P                  R4      P	                  4       pV'       d    V\
        9  d   W;;,          ^,          uu&   VP                  RR4      p\        P                  ! RV4      pV F4  pVP	                  4       pV\
        9  g   K   W;;,          ^,          uu&   K6  	  K  	  VP                  4        UU	u. uF  w  rVNK	  	  up	p# u up	pi )zExtract and rank @handles from X results.

Sources handles from:
1. author_handle field (who posted)
2. @mentions in post text (who they're talking about/to)

Returns handles ranked by frequency, filtered for generic accounts.
author_handle @textz@(\w{1,15}))	r   getstriplstriplowerGENERIC_HANDLESrefindallmost_common)
r	   handle_countsitemauthorr&   mentionsmentionmention_lowerh_s
   &         r   r   r   2   s     IM/2.446==cBHHJfO3!Q&! xx#::nd3G#MMOMO3,1,    (33565$!A5666s   .C?c                ~    V ^8  d   QhR\         \        \        \        3,          ,          R\         \        ,          /# r    r!   )r   s   "r   r   r   O   s+     > >d38n!5 >$s) >r   c                *   \        4       pV  FW  pVP                  RR4      p\        P                  ! RV4      pV F%  pWP	                  4       ;;,          ^,          uu&   K'  	  KY  	  VP                  4        UUu. uF
  w  rgRV 2NK  	  upp# u uppi )zRExtract and rank #hashtags from X results.

Returns hashtags ranked by frequency.
r&   r$   z#(\w{2,30})#)r   r'   r,   r-   r*   r.   )r	   hashtag_countsr0   r&   tagstagtr6   s   &       r   r   r   O   s    
 YNxx#zz.$/C99;'1,'   !/ : : <= <asG <===s   ;Bc                ~    V ^8  d   QhR\         \        \        \        3,          ,          R\         \        ,          /# )r   r   r   r!   )r   s   "r   r   r   `   s+     8 8d4S>&: 8tCy 8r   c                j   \        4       pV  F  pVP                  RR4      P                  4       P                  R4      pV'       d   W;;,          ^,          uu&   VP                  R. 4       F7  p\        P
                  ! RV4      pV F  pW;;,          ^,          uu&   K  	  K9  	  VP                  R. 4       FI  pVP                  RR4      p\        P
                  ! RV4      pV F  pW;;,          ^,          uu&   K  	  KK  	  K  	  VP                  4        UU	u. uF  w  r9VNK	  	  up	p# u up	pi )zExtract and rank subreddits from Reddit results.

Sources from:
1. subreddit field on each result
2. Cross-references in comment text (e.g., "check out r/localLLaMA")

Returns subreddits ranked by frequency.
	subredditr$   zr/comment_insightszr/(\w{2,30})top_commentsexcerpt)r   r'   r(   r)   r,   r-   r.   )
r   
sub_countsr0   subinsight
cross_refsrefcommentrC   r6   s
   &         r   r   r   `   s     Jhh{B'--/66t<Oq O xx 2B7GOW=J!1$ " 8 xx3Gkk)R0GOW=J!1$ " 4 ( )44676FCC6777s   D/>   xbbccnnjackmetaapplegithubgoogleopenairedditnytimesreuterstwitteryoutubeelonmuskverified	microsoft	wikipediasundarpichaiwashingtonpost)      r^   )__doc__r,   collectionsr   typingr   r   r   r+   r   r   r   r    r   r   <module>rd      s0    N 	  " "B7:>"8r   