+
    ix%                         R t ^ RIt^ RIHtHtHtHt ^ RIHt ^RI	H
t
Ht R R lt ! R R]4      tRR	 R
 lltR R ltRR R lltRR R lltRR R lltRR R lltR# )zReddit thread enrichment with real engagement metrics.

Supports two backends:
1. ScrapeCreators API (preferred) - no rate limits, 1 credit/call
2. reddit.com/.json (fallback) - free but 429-prone
N)AnyDictListOptional)urlparse)httpdatesc                F    V ^8  d   QhR\         R\        \         ,          /# )   urlreturn)strr   )formats   "Y/Users/bowang/.openclaw/workspace/skills/last30days-official/scripts/lib/reddit_enrich.py__annotate__r      s      S Xc]     c                h     \        V 4      pRVP                  9  d   R# VP                  #     R# ; i)zdExtract the path from a Reddit URL.

Args:
    url: Reddit URL

Returns:
    Path component or None
z
reddit.comN)r   netlocpath)r   parseds   & r   extract_reddit_pathr      s3    #v}},{{s   , , 1c                       ] tR t^!tRtRtR# )RedditRateLimitErrorz3Raised when Reddit returns HTTP 429 (rate limited). N)__name__
__module____qualname____firstlineno____doc____static_attributes__r   r   r   r   r   !   s    =r   r   c                    V ^8  d   QhR\         R\        \        ,          R\        R\        R\        \        \         \        3,          ,          /# )r
   r   	mock_datatimeoutretriesr   )r   r   r   intr   )r   s   "r   r   r   &   sK     ! !	!~! ! 	!
 d38n!r   c                    Ve   V# \        V 4      pV'       g   R#  \        P                  ! WBVR7      pV#   \        P                   d,   pTP                  R8X  d   \        RT  24      Th Rp?R# Rp?ii ; i)aC  Fetch Reddit thread JSON data.

Args:
    url: Reddit thread URL
    mock_data: Mock data for testing
    timeout: HTTP timeout per attempt in seconds
    retries: Number of retries on failure

Returns:
    Thread data dict or None on failure

Raises:
    RedditRateLimitError: When Reddit returns 429 (caller should bail)
Nr"   r#   i  z#Reddit rate limited (429) fetching )r   r   get_reddit_json	HTTPErrorstatus_coder   )r   r!   r"   r#   r   dataes   &&&&   r   fetch_thread_datar,   &   su    ( s#D##D7K>> ==C&)LSE'RSYZZs   7 A7 A22A7c                R    V ^8  d   QhR\         R\        \        \         3,          /# )r
   r*   r   )r   r   r   )r   s   "r   r   r   J   s"     6 6C 6DcN 6r   c                \   RRR. /p\        V \        4      '       d   \        V 4      ^8  d   V# V ^ ,          p\        V\        4      '       d   VP	                  R/ 4      P	                  R. 4      pV'       d   V^ ,          P	                  R/ 4      pRVP	                  R4      RVP	                  R4      RVP	                  R4      R	VP	                  R	4      R
VP	                  R
4      RVP	                  R4      RVP	                  RR4      R,          /VR&   \        V 4      ^8  Ed   V ^,          p\        V\        4      '       d   VP	                  R/ 4      P	                  R. 4      pV F  pVP	                  R4      R8w  d   K  VP	                  R/ 4      pVP	                  R4      '       g   KF  RVP	                  R^ 4      R	VP	                  R	4      RVP	                  RR4      RVP	                  RR4      R,          R
VP	                  R
4      /pVR,          P                  V4       K  	  V# )zParse Reddit thread JSON into structured data.

Args:
    data: Raw Reddit JSON response

Returns:
    Dict with submission and comments data

submissionNcommentsr*   childrenscorenum_commentsupvote_ratiocreated_utc	permalinktitleselftext :Ni  Nkindt1bodyauthor	[deleted]Ni,  N)
isinstancelistlendictgetappend)	r*   resultsubmission_listingr1   sub_datacomments_listingchildc_datacomments	   &        r   parse_thread_datarM   J   s    	dBF
 dD!!SY] a$d++%))&"599*bI{vr2Hg.^ <^ <x||M:X\\+6g.HLLR8>$F<  4yA~7&--'++FB7;;JKH!99V$,62.zz&)) VZZ3!6::m#<fjj;?FJJvr248K!8 z"))'2 "  Mr   c          	          V ^8  d   QhR\         \        ,          R\        R\         \        \        \        3,          ,          /# r
   r0   limitr   )r   r   r$   r   r   )r   s   "r   r   r      s2     # #tDz ## #tDcN?S #r   c                    V  Uu. uF  q"P                  R4      R9  g   K  VNK  	  pp\        VR RR7      pVRV # u upi )zGet top comments sorted by score.

Args:
    comments: List of comment dicts
    limit: Maximum number to return

Returns:
    Top comments sorted by score
r=   c                 &    V P                  R ^ 4      # r2   rD   cs   &r   <lambda>"get_top_comments.<locals>.<lambda>   s    !%%2Cr   TkeyreverseNr>   z	[removed])rD   sorted)r0   rP   rV   validsorted_commentss   &&   r   get_top_commentsr`      sL     !V1EE(O;U$UQQEV U(CTRO6E"" Ws   ==c                h    V ^8  d   QhR\         \        ,          R\        R\         \        ,          /# rO   )r   r   r$   r   )r   s   "r   r   r      s)     / /tDz /# /d3i /r   c                B  a . pV RV^,            EF
  pVP                  RR4      P                  4       oS'       d   \        S4      ^8  d   K>  . R
Op\        ;QJ d    V3R lV 4       F  '       g   K   RM	  RM! V3R lV 4       4      '       d   K  SR,          p\        S4      ^8  dJ   \	        V4       F#  w  rgVR9   g   K  V^28  g   K  VRV^,            p M	  VP                  4       R	,           pVP                  V4       \        V4      V8  g   EK
   V# 	  V# )a%  Extract key insights from top comments.

Uses simple heuristics to identify valuable comments:
- Has substantive text
- Contains actionable information
- Not just agreement/disagreement

Args:
    comments: Top comments
    limit: Max insights to extract

Returns:
    List of insight strings
Nr<   r9   c              3   n   <"   T F*  p\         P                  ! VSP                  4       4      x  K,  	  R # 5i)N)rematchlower).0pr<   s   & r   	<genexpr>+extract_comment_insights.<locals>.<genexpr>   s%     @-Qrxx4::<((-s   25TF:N   Nz.!?z...)z@^(this|same|agreed|exactly|yep|nope|yes|no|thanks|thank you)\.?$z^lol|lmao|hahaz^\[deleted\]z^\[removed\])rD   striprB   any	enumeraterstriprE   )	r0   rP   insightsrL   skip_patternsinsighticharr<   s	   &&      @r   extract_comment_insightsru      s     HJUQY'{{62&,,.s4y2~
 3@-@333@-@@@ t*t9s?$W-5=QV%dqsmG .
 "..*U2 x=E!O= (< Or   c                    V ^8  d   QhR\         \        \        3,          R\        \         ,          R\        R\        R\         \        \        3,          /# )r
   itemmock_thread_datar"   r#   r   )r   r   r   r   r$   )r   s   "r   r   r      sN     = =
sCx.=tn= = 	=
 
#s(^=r   c                    V P                  RR4      p\        WAW#R7      pV'       g   V # \        V4      pVP                  R4      pVP                  R. 4      pV'       dk   RVP                  R4      RVP                  R4      RVP                  R4      /V R	&   VP                  R
4      p	V	'       d   \        P                  ! V	4      V R&   \        V4      p
. V R&   V
 F  pVP                  RR4      pV'       d   RV 2MRpV R,          P                  RVP                  R^ 4      R\        P                  ! VP                  R
4      4      RVP                  RR4      RVP                  RR4      R,          RV/4       K  	  \        V
4      V R&   V # )u~  Enrich a Reddit item with real engagement data.

Args:
    item: Reddit item dict
    mock_thread_data: Mock data for testing
    timeout: HTTP timeout per attempt (default 10s for enrichment)
    retries: Number of retries (default 1 — fail fast for enrichment)

Returns:
    Enriched item dict

Raises:
    RedditRateLimitError: Propagated so caller can bail on remaining items
r   r9   r&   r/   r0   r2   r3   r4   
engagementr5   datetop_commentsr6   https://reddit.comr=   excerptr<   N   Ncomment_insights)rD   r,   rM   r   timestamp_to_dater`   rE   ru   )rw   rx   r"   r#   r   thread_datar   r/   r0   r5   r|   rV   r6   comment_urls   &&&&          r   enrich_reddit_itemr      sw   ( ((5"
C $C7\K{+FL)Jzz*b)H Z^^G,JNN>:JNN>:
\ !nn]3 22;?DL $H-LDEE+r*	:C*9+6^##QUU7A&E++AEE-,@AaeeHb)quuVR(.;%
 	   8ED	Kr   c          
          V ^8  d   QhR\         \        \        3,          R\        R\        R\         \        \        3,          /# )r
   rw   tokenr"   r   )r   r   r   r$   )r   s   "r   r   r     s@     = =
sCx.== = 
#s(^	=r   c                   ^RI Hp V P                  RR4      pV'       g   V # VP                  WA4      pV'       g   V # . pVR,           F  pVP                  RR4      pV'       d   VR9   d   K&  VP                  R4      ;'       g    VP                  R^ 4      p	VP                  R	R4      p
VP                  R
R4      pV'       d   RV 2MRpTP	                  RT	RVP                  R4      '       d&   \
        P                  ! VP                  R4      4      MRR	V
RVR,          RVR,          RV/4       K  	  VP                  R RR7       . V R&   V Ft  pV R,          P	                  RVP                  R^ 4      RVP                  R4      R	VP                  R	R4      RVP                  RR4      RVP                  RR4      /4       Kv  	  \        V4      V R&   V # )a*  Enrich a Reddit item using ScrapeCreators comment API.

No rate limit risk. Uses 1 credit per call.

Args:
    item: Reddit item dict (already has engagement from search)
    token: ScrapeCreators API key
    timeout: HTTP timeout

Returns:
    Enriched item with top_comments and comment_insights
)redditr   r9   :N
   Nr<   r>   upsr2   r=   r6   r}   r{   r5   Nr?   r~   r   c                 &    V P                  R ^ 4      # rS   rT   rU   s   &r   rW   'enrich_reddit_item_sc.<locals>.<lambda>7  s    AEE'1$5r   TrY   r|   r   r\   )	r9   r   rD   fetch_post_commentsrE   r   r   sortru   )rw   r   r"   
reddit_modr   raw_commentsr|   rV   r<   r2   r=   r6   r   s   &&&          r   enrich_reddit_item_scr     s   " '
((5"
C11#=LL#uuVR t99e11gq 1x-EE+r*	:C*9+6UQUU=EYEYE++AEE-,@A_cfDJtDz;
 	 & 5tDD^##QUU7A&AEE&MaeeHb)quuY+155#%
 	   8ED	Kr   )N      )r   )   )Nr      )r   )r   rd   typingr   r   r   r   urllib.parser   r9   r   r   r   	Exceptionr   r,   rM   r`   ru   r   r   r   r   r   <module>r      sS    
 , , ! $	9 	
!H6r#&/d=@= =r   