+
    i5                        R t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RI	t	^ RI
HtHt ^ RIHt ^ RIHtHtHtHtHtHt R^
R^R^(/tR^R^R^/tRt]! 0 R	kR
kRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkR kR!kR"kR#kR$kR%kR&kR'kR(kR)kR*kR+kR,kR-kR.kR/kR0kR1kR2kR3kR4k4      t/ R5R6R70bR8R6R70bR60 R_mbR70 R`mbR9R:0bR:R90bR;R<0bR<R;0bR=R>R?0bR@RARB0bRCRD0bRDRC0bRERF0bRFRE0bRGRH0bRHRG0btRI RJ ltRK RL ltRM RN ltRO RP ltRQ RR lt RaRS RT llt!RU RV lt"RW RX lt#RbRY RZ llt$RaR[ R\ llt%R] R^ lt&R# )cu.  YouTube search and transcript extraction via yt-dlp for /last30days v2.1.

Uses yt-dlp (https://github.com/yt-dlp/yt-dlp) for both YouTube search and
transcript extraction. No API keys needed — just have yt-dlp installed.

Inspired by Peter Steinberger's toolchain approach (yt-dlp + summarize CLI).
N)ThreadPoolExecutoras_completed)Path)AnyDictListOptionalSetTuplequickdefaultdeepi  theaantoforhowisinofonandwithfrombyatthisthatitmyyourimeweyouwhataredocanitsbeornotnosoifbutaboutalljustgethashavewaswillhipraphiphophopjs
javascriptts
typescriptai
artificialintelligencemlmachinelearningreactreactjssveltesveltejsvuevuejsc                F    V ^8  d   QhR\         R\        \         ,          /# )   textreturn)strr	   )formats   "V/Users/bowang/.openclaw/workspace/skills/last30days-official/scripts/lib/youtube_yt.py__annotate__rU   E   s     
 
C 
CH 
    c                P   \         P                  ! RRV P                  4       4      P                  4       pV Uu0 uF#  q"\        9  g   K  \        V4      ^8  g   K!  VkK%  	  pp\        V4      pV F,  pV\        9   g   K  VP                  \        V,          4       K.  	  V# u upi )zLowercase, strip punctuation, remove stopwords, drop single-char tokens.
Expands tokens with synonyms for better cross-domain matching.z[^\w\s] )	resublowersplit	STOPWORDSlensetSYNONYMSupdate)rP   wordswtokensexpandedts   &     rT   	_tokenizerg   E   s     FF:sDJJL1779EDA9"4aQ!aaFD6{H=OOHQK(  O Es   B#
B#B#c                <    V ^8  d   QhR\         R\         R\        /# )rO   querytitlerQ   )rR   float)rS   s   "rT   rU   rU   R   s!     % %c %# %% %rV   c                    \        V 4      p\        V4      pV'       g   R# \        W#,          4      pV\        V4      ,          p\        R\        RV4      4      # )zCompute relevance as ratio of query tokens found in title.

Uses ratio overlap (intersection / query_length) so short queries
score higher when fully represented in the title. Floors at 0.1.
g      ?g?g      ?)rg   r^   maxmin)ri   rj   q_tokenst_tokensoverlapratios   &&    rT   _compute_relevancers   R   sL     HH(%&Gc(m#EsCUO$$rV   c                $    V ^8  d   QhR\         /# )rO   msgrR   )rS   s   "rT   rU   rU   c   s      c rV   c                    \         P                  P                  RV  R24       \         P                  P                  4        R# )zLog to stderr.z
[YouTube] 
N)sysstderrwriteflush)ru   s   &rT   _logr}   c   s-    JJz#b)*JJrV   c                $    V ^8  d   QhR\         /# )rO   rQ   )bool)rS   s   "rT   rU   rU   i   s     . .D .rV   c                 2    \         P                  ! R4      RJ# )z%Check if yt-dlp is available in PATH.yt-dlpN)shutilwhich rV   rT   is_ytdlp_installedr   i   s    <<!--rV   c                0    V ^8  d   QhR\         R\         /# )rO   topicrQ   rv   )rS   s   "rT   rU   rU   n   s     #  #  #  # rV   c                   V P                  4       P                  4       p. ROpV F?  pVP                  VR,           4      '       g   K#  V\        V4      R P                  4       pKA  	  0 RmpVP	                  4       pV Uu. uF  qfV9  g   K  VNK  	  ppV'       d   RP                  V4      MTpVP                  R4      # u upi )zExtract core subject from verbose query for YouTube search.

Strips meta/research words to keep only the core product/concept name,
similar to bird_x.py's approach.
rX   Nz?!.)zwhat are the bestzwhat is the bestzwhat are the latestzwhat are people saying aboutzwhat do people think aboutzhow do i usez
how to usezhow tozwhat arezwhat isztips forzbest practices for>   newtopbestgoodnewsgreatviraladvicekillerlatestpromptra   awesomehottestmethodspopularpromptsupdatesfeaturestrending	practices	prompting
approaches
strategiesrecommendations)r[   strip
startswithr^   r\   joinrstrip)	r   rP   prefixespnoiserb   rc   filteredresults	   &        rT   _extract_core_subjectr   n   s     ;;= DH ??1s7##A=&&(D E JJLE 35aUN5H3#+SXXhF== 4s    CCc                v    V ^8  d   QhR\         R\         R\         R\         R\        \         \        3,          /# rO   r   	from_dateto_datedepthrQ   rR   r   r   )rS   s   "rT   rU   rU      sI     q qqq q 	q
 
#s(^qrV   c                b   \        4       '       g   R. RR/# \        P                  V\        R,          4      p\        V 4      p\	        RV RV RV R24       R	R
V RV 2RRR.p\        \        R4      '       d   \        P                  MRp \        P                  ! V\        P                  \        P                  RVR7      p VP                  ^xR7      w  rT	;'       g    RP1                  4       '       g   \	        R4       R. /# . pT	P1                  4       P3                  R4       EF  pTP1                  4       pT'       g   K   \4        P6                  ! T4      pTP                  RR4      pTP                  R4      ;'       g    ^ pTP                  R4      ;'       g    ^ pTP                  R4      ;'       g    ^ pTP                  RR4      pRpT'       d0   \;        T4      ^8X  d    TR,           R TR!,           R TR",           2pTP=                  R#TR$TP                  R$R4      R%R&T 2R'TP                  R(TP                  R)R4      4      R*TR+R,TR-TR.T/R/TP                  R/4      R0\?        Y]P                  R$R4      4      R1R2TP                  R$T4      R3,           2/	4       EK  	  T Uu. uF&  pTR*,          '       g   K  TR*,          T8  g   K$  TNK(  	  pp\;        T4      ^8  d   Tp\	        R4\;        T4       R524       M$\	        R4\;        T4       R6\;        T4       R724       TPA                  R8 RR97       RT/#   \        P                   d     \        P                  ! \        P                  ! TP                  4      \         P"                  4       M,  \$        \&        \(        3 d    TP+                  4         Mi ; iTP-                  ^R7       \	        R4       R. RR/u # i ; i  \.         d
    R. RR/u # i ; i  \4        P8                   d     EK  i ; iu upi ):zSearch YouTube via yt-dlp. No API key needed.

Args:
    topic: Search topic
    from_date: Start date (YYYY-MM-DD)
    to_date: End date (YYYY-MM-DD)
    depth: 'quick', 'default', or 'deep'

Returns:
    Dict with 'items' list of video metadata dicts.
itemserrorzyt-dlp not installedr   zSearching YouTube for 'z	' (since z, count=)r   ytsearch:z--dump-json--no-warningsz--no-downloadsetsidNTstdoutrz   rP   
preexec_fntimeoutzYouTube search timed out (120s)zSearch timed outzyt-dlp not found z!YouTube search returned 0 resultsrx   id
view_count
like_countcomment_countupload_date:N   N-:r      N:r      Nvideo_idrj   url https://www.youtube.com/watch?v=channel_namechanneluploaderdate
engagementviewslikescommentsduration	relevancewhy_relevantz	YouTube: :N<   NzFound z videos within date rangez	 videos (z  within date range, keeping all)c                 "    V R ,          R,          # )r   r   r   )xs   &rT   <lambda> search_youtube.<locals>.<lambda>  s    Q|_W5rV   )keyreverse)!r   DEPTH_CONFIGr5   r   r}   hasattrosr   
subprocessPopenPIPEcommunicateTimeoutExpiredkillpggetpgidpidsignalSIGTERMProcessLookupErrorPermissionErrorOSErrorkillwaitFileNotFoundErrorr   r\   jsonloadsJSONDecodeErrorr^   appendrs   sort)r   r   r   r   count
core_topiccmdpreexecprocr   rz   r   linevideor   r   r   r   r   date_strr"   recents   &&&&                  rT   search_youtuber      s   " W&<==UL$;<E&u-J":,i	{(5'QR	ST 	
5':,'C #2x00biidG:????
		>!--c-:NF LLb!!01} E$$T*zz|	JJt$E 99T2&YY|,11
YY|,11
		/277aiir2 3{+q0%b/*!K,<+=Q{3?O>PQHUYYw+5hZ@EIIi:r1JKHM
 		*-+J		'28NOi		':(Fs(K'LM
 	) +J GA!F))a&	Y0FaaFG
6{avc%j\!:;<vc%j\3v;-7WXY 
JJ5tJDUA (( 	>		"**TXX.?&A 		IIaI 23R*<==	>  :W&899: ## 		> Hsy   7O8 M	 	P4P,
P,P,	O5AN#"O5#&O	O5O&O52O8 4O55O8 8PPP)(P)c                0    V ^8  d   QhR\         R\         /# )rO   vtt_textrQ   rv   )rS   s   "rT   rU   rU     s     9 9 9 9rV   c                \   \         P                  ! RRV \         P                  R7      p\         P                  ! RRV4      p\         P                  ! RRV4      p\         P                  ! RRV\         P                  R7      pVP	                  4       P                  R4      p\        4       p. pV FG  pVP	                  4       pV'       g   K  Wc9  g   K%  VP                  V4       VP                  V4       KI  	  \         P                  ! RR	R	P                  V4      4      P	                  4       # )
z/Convert VTT subtitle format to clean plaintext.z^WEBVTT.*?\n\nr   )flagsz=\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}.*\nz<[^>]+>z^\d+\s*$rx   z\s+rX   )
rY   rZ   DOTALL	MULTILINEr   r\   r_   addr   r   )r   rP   linesseenuniquer   strippeds   &      rT   
_clean_vttr
    s     66#RCD66RTVX\]D66*b$'D66+r4r||<DJJLt$E5DF::<8,HHXMM(#	 
 66&#sxx/06688rV   c                R    V ^8  d   QhR\         R\         R\        \         ,          /# )rO   r   temp_dirrQ   )rR   r   )rS   s   "rT   rU   rU     s*     A. A.s A.c A.hsm A.rV   c                   RRRRRRRRR	V R
2RV  2.p\        \        R4      '       d   \        P                  MRp \        P                  ! V\        P
                  \        P
                  RVR7      p VP                  ^R7       \'        T4      T  R2,          pTP)                  4       '       g)   \'        T4      P+                  T  R24       F  pTp M	  R#  TP-                  RRR7      p\/        T4      pTP1                  4       p	\3        T	4      \4        8  d    RP7                  T	R\4         4      R,           pT'       d   T# R#   \        P                   d     \        P                  ! \        P                  ! TP                  4      \        P                  4       M,  \        \        \        3 d    TP!                  4         Mi ; iTP#                  ^R7        R# i ; i  \$         d     R# i ; i  \         d     R# i ; i)zFetch auto-generated transcript for a YouTube video.

Args:
    video_id: YouTube video ID
    temp_dir: Temporary directory for subtitle files

Returns:
    Plaintext transcript string, or None if no captions available.
r   z--write-auto-subsz
--sub-langenz--sub-formatvttz--skip-downloadr   z-oz/%(id)sr   r   NTr   r   z.en.vttz*.vttzutf-8replace)encodingerrorsrX   z...)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   existsglob	read_textr
  r\   r^   TRANSCRIPT_MAX_WORDSr   )
r   r  r   r   r   vtt_pathr   raw
transcriptrb   s
   &&        rT   fetch_transcriptr    s    	d
'"
*8*5	C #2x00biidG????
	R( H~8*G 44H??h$$z%78AH 9   ') D CJ E
5z((XXe$9%9:;eC
#:--A (( 			"**TXX.?&A 		IIaI 	    sf   7G' 5E G9 G$AF"!G$"&GG$
GG$ G' #G$$G' 'G65G69HHc          	          V ^8  d   QhR\         \        ,          R\        R\        \        \        \        ,          3,          /# )rO   	video_idsmax_workersrQ   )r   rR   intr   r   )rS   s   "rT   rU   rU   b  s8     " "Cy"" 
#x}
"rV   c                r   V '       g   / # \        R\        V 4       R24       / p\        P                  ! 4       ;_uu_ 4       p\	        VR7      ;_uu_ 4       pV  Uu/ uF  pVP                  \        WS4      VbK  	  pp\        V4       F  pWg,          p VP                  4       W%&   K   	  RRR4       RRR4       \        R VP                  4        4       4      p\        RV R\        V 4       R24       V# u upi   \         d	    RY%&    K  i ; i  + '       g   i     Lp; i  + '       g   i     L{; i)zFetch transcripts for multiple videos in parallel.

Args:
    video_ids: List of YouTube video IDs
    max_workers: Max parallel fetches

Returns:
    Dict mapping video_id to transcript text (or None).
zFetching transcripts for z videos)r  Nc              3   8   "   T F  q'       g   K  ^x  K  	  R# 5i)   Nr   ).0vs   & rT   	<genexpr>-fetch_transcripts_parallel.<locals>.<genexpr>  s     /)AQaa)s   	
zGot transcripts for /)r}   r^   tempfileTemporaryDirectoryr   submitr  r   r   	Exceptionsumvalues)	r  r  resultsr  executorvidfuturesfuturegots	   &&       rT   fetch_transcripts_parallelr3  b  s    	$S^$4G	<=G		$	$	&	&(K88H %$C  0#@#E$   'w/o(#)==?GL 0 9 
' /)/
/CuAc)n%5W	=>N ! (#'GL( 98 
'	&sZ   D&D	 C8<D	C=
'D	+D&8D	=DD	DD	D#D&&D6	c                v    V ^8  d   QhR\         R\         R\         R\         R\        \         \        3,          /# r   r   )rS   s   "rT   rU   rU     sB     # ### # 	#
 
#s(^#rV   c                T   \        WW#4      pVP                  R. 4      pV'       g   V# \        P                  V\        R,          4      pVRV  Uu. uF  qwR,          NK  	  pp\        V4      p	V F,  pVR,          p
V	P                  V
4      pT;'       g    RVR&   K.  	  RV/# u upi )a.  Full YouTube search: find videos, then fetch transcripts for top results.

Args:
    topic: Search topic
    from_date: Start date (YYYY-MM-DD)
    to_date: End date (YYYY-MM-DD)
    depth: 'quick', 'default', or 'deep'

Returns:
    Dict with 'items' list. Each item has a 'transcript_snippet' field.
r   r   Nr   r   transcript_snippet)r   r5   TRANSCRIPT_LIMITSr3  )r   r   r   r   search_resultr   transcript_limititemtop_idstranscriptsr/  r  s   &&&&        rT   search_and_transcriber=    s    $ #5WDMgr*E ),,U4Ei4PQ,12C3C,DE,DDJ,DGE,W5K : __S)
%/%5%52!" 
 U Fs   B%c                    V ^8  d   QhR\         \        \        3,          R\        \         \        \        3,          ,          /# )rO   responserQ   )r   rR   r   r   )rS   s   "rT   rU   rU     s/     % %T#s(^ %T#s(^8L %rV   c                &    V P                  R. 4      # )znParse YouTube search response to normalized format.

Returns:
    List of item dicts ready for normalization.
r   )r5   )r?  s   &rT   parse_youtube_responserA    s     <<$$rV   >   r:   r=   r<   >   r:   r=   r;   )r   )   )'__doc__r   mathr   rY   r   r   r   ry   r'  concurrent.futuresr   r   pathlibr   typingr   r   r   r   r	   r
   r   r7  r  	frozensetr]   r`   rg   rs   r}   r   r   r   r
  r  r3  r=  rA  r   rV   rT   <module>rI     s:     	 	    
  ?  8 8 Rr
B Qq
A     	!#(*.046:<@	!%'-/57;=A     # %+ -2 48 :? 
	 	 	 	 #	 %)	 +/	 16	 8?	
 

 
 
  
 "(
 */
 17 		E8	E8 
# #	
 	<. 4& 	<. 4& 	<
( 	9j
! i[ y zl 
 
G9  eW!(
%".
# Lqh9,A.H"J#L%rV   