前回の続きでYouTubeの動画から字幕テキストを取得するときのメモ。
import csv from youtube_transcript_api import YouTubeTranscriptApi def get_video_transcript(video_url): # 動画IDをURLから抽出 video_id = video_url.split('v=')[-1].split('&')[0] try: # 字幕を取得 transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ja']) # 字幕テキストを結合 transcript_text = ' '.join([entry['text'] for entry in transcript]) return transcript_text except Exception as e: print(f"Error retrieving transcript: {e}") return None def main(): # video_list.csvからURLを読み込み中 video_transcripts = [] with open('video_list.csv', mode='r', encoding='utf-8') as file: reader = csv.reader(file) next(reader) # ヘッダをスキップ for row in reader: print(f"Title: {row[0]}") video_url = row[1] transcript_text = get_video_transcript(video_url) video_transcripts.append({'title': row[0], 'url': video_url, 'transcript': transcript_text}) # 取得した字幕テキストをCSVに出力 with open('video_transcripts.csv', mode='w', newline='', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow(['title', 'url', 'transcript']) for video_transcript in video_transcripts: writer.writerow([video_transcript['title'], video_transcript['url'], video_transcript['transcript']]) if __name__ == "__main__": main()