+
    i-                     L   R t ^ RIt^ RIHtHt ^ RIHtHtHtHtH	t	 ^ RI
Ht ^RIHt / R^bR^bR^bR	^bR
^bR^bR^bR^bR^bR^bR^bR^bR^bR^bR^bR^	bR^	bR^	R^
R^
R^R^R^R^/CtR R ltR  R! ltR" R# lt0 R.mtR$ R% ltR& R' ltR/R( R) lltR* R+ ltR, R- ltR# )0a  WebSearch module for last30days skill.

NOTE: WebSearch uses the assistant's built-in web search tool, which runs inside the host environment.
Unlike Reddit/X which use external APIs, web search results are obtained by the assistant
directly and passed to this module for normalization and scoring.

The typical flow is:
1. The assistant invokes its web search tool with the topic
2. The assistant passes results to parse_websearch_results()
3. Results are normalized into WebSearchItem objects
N)datetime	timedelta)AnyDictListOptionalTuple)urlparse)schemajanjanuaryfebfebruarymarmarchapraprilmayjunjunejuljulyaugaugustsepsept	septemberoctoctobernovnovemberdecdecemberc                F    V ^8  d   QhR\         R\        \         ,          /#    urlreturnstrr   )formats   "U/Users/bowang/.openclaw/workspace/skills/last30days-official/scripts/lib/websearch.py__annotate__r,   &   s     # #s #x} #    c                Z   \         P                  ! RV 4      pV'       do   VP                  4       w  r#pR\        V4      u;8:  d   R8:  dD   M M@^\        V4      u;8:  d   ^8:  d)   M M%^\        V4      u;8:  d   ^8:  d   M M
V RV RV 2# \         P                  ! RV 4      pV'       do   VP                  4       w  r#pR\        V4      u;8:  d   R8:  dD   M M@^\        V4      u;8:  d   ^8:  d)   M M%^\        V4      u;8:  d   ^8:  d   M M
V RV RV 2# \         P                  ! RV 4      pV'       dr   VP                  4       w  r#pR\        V4      u;8:  d   R8:  dG   M R# ^\        V4      u;8:  d   ^8:  d+   M R# ^\        V4      u;8:  d   ^8:  d   M R# V RV RV 2# R# )zTry to extract a date from URL path.

Many sites embed dates in URLs like:
- /2026/01/24/article-title
- /2026-01-24/article
- /blog/20260124/title

Args:
    url: URL to parse

Returns:
    Date string in YYYY-MM-DD format, or None
z/(\d{4})/(\d{2})/(\d{2})/    -z/(\d{4})-(\d{2})-(\d{2})[-/]z/(\d{4})(\d{2})(\d{2})/N)researchgroupsint)r&   matchyearmonthdays   &    r+   extract_date_from_urlr:   &   sk    II2C8E <<>S3t9$$c%j)>B)>1CCVTVCVV1UG1SE** II5s;E <<>S3t9$$c%j)>B)>1CCVTVCVV1UG1SE** II0#6E <<>S3t9$$  *+c%j)>B)>  DECCVTVCV  V1UG1SE**r-   c                F    V ^8  d   QhR\         R\        \         ,          /# )r%   textr'   r(   )r*   s   "r+   r,   r,   L   s"     W WC WHSM Wr-   c                \   V '       g   R# V P                  4       p\        P                  ! RV4      pV'       d   VP                  4       w  r4p\        P                  VR,          4      pV'       dL   R\        V4      u;8:  d   R8:  d4   M M0^\        V4      u;8:  d   ^8:  d   M MV RVR R\        V4      R 2# \        P                  ! RV4      pV'       d   VP                  4       w  rCp\        P                  VR,          4      pV'       dL   R\        V4      u;8:  d   R8:  d4   M M0^\        V4      u;8:  d   ^8:  d   M MV RVR R\        V4      R 2# \        P                  ! R	V 4      pV'       do   VP                  4       w  rVpR\        V4      u;8:  d   R8:  dD   M M@^\        V4      u;8:  d   ^8:  d)   M M%^\        V4      u;8:  d   ^8:  d   M M
V RV RV 2# \        P                  ! 4       pR
V9   d%   V\        ^R7      ,
          pVP                  R4      # RV9   d   VP                  R4      # \        P                  ! RV4      pV'       dF   \        VP                  ^4      4      p	V	^<8:  d%   V\        V	R7      ,
          pVP                  R4      # \        P                  ! RV4      pV'       d   VP                  R4      # RV9   d%   V\        ^R7      ,
          pVP                  R4      # RV9   d%   V\        ^R7      ,
          pVP                  R4      # R# )a  Try to extract a date from text snippet or title.

Looks for patterns like:
- January 24, 2026 or Jan 24, 2026
- 24 January 2026
- 2026-01-24
- "3 days ago", "yesterday", "last week"

Args:
    text: Text to parse

Returns:
    Date string in YYYY-MM-DD format, or None
Nz\b(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+(\d{1,2})(?:st|nd|rd|th)?,?\s*(\d{4})\b:N   Nr/   r0   r1   02dz\b(\d{1,2})(?:st|nd|rd|th)?\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+(\d{4})\bz\b(\d{4})-(\d{2})-(\d{2})\b	yesterday)daysz%Y-%m-%dtodayz\b(\d+)\s*days?\s*ago\bz\b(\d+)\s*hours?\s*ago\bz	last weekz	this week)lowerr2   r3   r4   	MONTH_MAPgetr5   r   nowr   strftimegroup)
r<   
text_lowerr6   	month_strr9   r7   r8   rB   daterA   s
   &         r+   extract_date_from_snippetrL   L   s    J II	6 		E $||~	im,TSY.$.1C3FB3FV1U3KqS#77 II	 	E $||~im,TSY.$.1C3FB3FV1U3KqS#77 II4d;E <<>S3t9$$c%j)>B)>1CCVTVCVV1UG1SE** LLNEj ya((}}Z((*~~j)) II0*=E5;;q>"2:9$//D==,, II1:>E~~j)) j ya((}}Z(( j ya((}}Z((r-   c          
          V ^8  d   QhR\         R\         R\         R\        \        \         ,          \         3,          /# )r%   r&   snippettitler'   )r)   r   r   )r*   s   "r+   r,   r,      s>     # #	## # 8C=#	#r-   c                    \        V 4      pV'       d   VR3# \        V4      pV'       d   VR3# \        V4      pV'       d   VR3# R# )am  Extract date from any available signal.

Tries URL first (most reliable), then snippet, then title.

Args:
    url: Page URL
    snippet: Page snippet/description
    title: Page title

Returns:
    Tuple of (date_string, confidence)
    - date from URL: 'high' confidence
    - date from snippet/title: 'med' confidence
    - no date found: None, 'low' confidence
highmed)Nlow)r:   rL   )r&   rN   rO   url_datesnippet_date
title_dates   &&&   r+   extract_date_signalsrW      sV    * %S)H -W5LU"" +51J5  r-   c                0    V ^8  d   QhR\         R\         /# r$   )r)   )r*   s   "r+   r,   r,      s        r-   c                     \        V 4      pVP                  P                  4       pVP                  R4      '       d
   VR,          pV#   \         d     R# i ; i)ziExtract the domain from a URL.

Args:
    url: Full URL

Returns:
    Domain string (e.g., "medium.com")
zwww.:   NN )r	   netlocrC   
startswith	Exceptionr&   parseddomains   &  r+   extract_domainrb      sS    #$$&V$$BZF s   AA	 	AAc                0    V ^8  d   QhR\         R\        /# r$   )r)   bool)r*   s   "r+   r,   r,      s      C D r-   c                     \        V 4      pVP                  P                  4       pV\        9   #   \         d     R# i ; i)z~Check if URL is from an excluded domain (Reddit/X).

Args:
    url: URL to check

Returns:
    True if URL should be excluded
F)r	   r\   rC   EXCLUDED_DOMAINSr^   r_   s   &  r+   is_excluded_domainrg      s@    #$$&))) s   -0 ??c                    V ^8  d   QhR\         \        \        \        3,          ,          R\        R\        R\        R\         \        \        \        3,          ,          /# )r%   resultstopic	from_dateto_dater'   )r   r   r)   r   )r*   s   "r+   r,   r,      s[     Y Y$sCx.!YY Y 	Y
 
$sCx.Yr-   c                <   . p\        V 4       EF  w  rV\        V\        4      '       g   K  VP                  RR4      pV'       g   K:  \	        V4      '       d   KM  \        VP                  RR4      4      P                  4       p\        VP                  RVP                  RR4      4      4      P                  4       p	V'       g   V	'       g   K  VP                  R4      p
RpV
'       d*   \        P                  ! R\        V
4      4      '       d   R	pM\        WyV4      w  rV'       d   Tp
TpV
'       d   V'       d
   W8  d   EK8  V
'       d   V'       d
   W8  d   EKQ  VP                  R
R4      p \        R\        R\        V4      4      4      pRRV^,            2RVR,          RVR\        V4      RV	R,          RV
RVR
VR\        VP                  RR4      4      P                  4       /	pVP!                  V4       EK  	  V#   \        \        3 d    Rp Li ; i)a  Parse WebSearch results into normalized format.

This function expects results from Claude's WebSearch tool.
Each result should have: title, url, snippet, and optionally date/relevance.

Uses "Date Detective" approach:
1. Extract dates from URLs (high confidence)
2. Extract dates from snippets/titles (med confidence)
3. Hard filter: exclude items with verified old dates
4. Keep items with no date signals (with low confidence penalty)

Args:
    results: List of WebSearch result dicts
    topic: Original search topic (for context)
    from_date: Start date for filtering (YYYY-MM-DD)
    to_date: End date for filtering (YYYY-MM-DD)

Returns:
    List of normalized item dicts ready for WebSearchItem creation
r&   r[   rO   rN   descriptionrK   rS   z^\d{4}-\d{2}-\d{2}$rR   	relevance      ?g      ?g        idW:N   Nsource_domain:Ni  Ndate_confidencewhy_relevant)	enumerate
isinstancedictrE   rg   r)   stripr2   r6   rW   minmaxfloat	TypeError
ValueErrorrb   append)ri   rj   rk   rl   itemsiresultr&   rO   rN   rK   ru   extracted_date
confidencero   items   &&&&            r+   parse_websearch_resultsr      s   4 Ew'	&$''jj# c""FJJw+,224fjjFJJ}b,IJKQQSW zz&!BHH3SY??#O *>cE)R&N%", I$"2 G JJ{C0		CS%	*:!;<I
 AacU)U4[3^C0wt}DC

>2 >?EEG

 	Tw (z L# :& 	I	s   2HHHc          	          V ^8  d   QhR\         \        \        \        3,          ,          R\        R\        R\         \        P
                  ,          /# )r%   r   rk   rl   r'   )r   r   r)   r   r
   WebSearchItem)r*   s   "r+   r,   r,   Z  sH      S#X  
&

	r-   c                @   . pV  F  p\         P                  ! VR,          VR,          VR,          VR,          VR,          VP                  R4      VP                  RR4      VP                  R	R
4      VP                  RR4      R7	      pVP                  V4       K  	  V# )zConvert parsed dicts to WebSearchItem objects.

Args:
    items: List of parsed item dicts
    from_date: Start of date range (YYYY-MM-DD)
    to_date: End of date range (YYYY-MM-DD)

Returns:
    List of WebSearchItem objects
rq   rO   r&   rt   rN   rK   ru   rS   ro   rp   rv   r[   )	rq   rO   r&   rt   rN   rK   ru   ro   rv   )r
   r   rE   r   )r   rk   rl   r   r   web_items   &&&   r+   normalize_websearch_itemsr   Z  s     F''Dzw-U/O&! HH%6>hh{C0."5

 	h  Mr-   c                    V ^8  d   QhR\         \        P                  ,          R\         \        P                  ,          /# )r%   r   r'   )r   r
   r   )r*   s   "r+   r,   r,   |  s0      D!5!56 4@T@T;U r-   c                    \        4       p. pV  FV  pVP                  P                  4       P                  R4      pWA9  g   K4  VP	                  V4       VP                  V4       KX  	  V# )zRemove duplicate WebSearch items.

Deduplication is based on URL.

Args:
    items: List of WebSearchItem objects

Returns:
    Deduplicated list
/)setr&   rC   rstripaddr   )r   	seen_urlsr   r   url_keys   &    r+   dedupe_websearchr   |  sZ     IF((.."))#.#MM'"MM$  Mr-   >   x.com	www.x.com
reddit.comtwitter.comold.reddit.comwww.reddit.comwww.twitter.commobile.twitter.com)r[   r[   )__doc__r2   r   r   typingr   r   r   r   r   urllib.parser	   r[   r
   rD   r:   rL   rW   rf   rb   rg   r   r   r    r-   r+   <module>r      sG  
 
 ( 3 3 ! 	1	1! 
1 q 
1	 q	
 
1 
1 a 
1 a 
1  
1 a %a	2y"	2z2	2z2	 #LWt#N	 ("YxDr-   