[Improvements] Upgrade langchain-openai package and other improvements (#1372)

This commit is contained in:
Deshraj Yadav
2024-05-21 23:42:50 -07:00
committed by GitHub
parent 9ba445e623
commit 9544c69c55
9 changed files with 86 additions and 232 deletions

View File

@@ -1,12 +1,11 @@
import hashlib
import json
import logging
try:
from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
raise ImportError(
'YouTube video requires extra dependencies. Install with `pip install youtube-transcript-api "`'
)
raise ImportError('YouTube video requires extra dependencies. Install with `pip install youtube-transcript-api "`')
try:
from langchain_community.document_loaders import YoutubeLoader
except ImportError:
@@ -32,17 +31,15 @@ class YoutubeVideoLoader(BaseLoader):
metadata = doc[0].metadata
metadata["url"] = url
video_id = url.split("v=")[1].split('&')[0]
video_id = url.split("v=")[1].split("&")[0]
try:
# Fetching transcript data
transcript = YouTubeTranscriptApi.get_transcript(video_id,languages=['en'])
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
# convert transcript to json to avoid unicode symboles
metadata["transcript"] = json.dumps(transcript, ensure_ascii=True)
except Exception as e:
except Exception:
logging.exception(f"Failed to fetch transcript for video {url}")
metadata["transcript"] = "Unavailable"
metadata["transcript"] = "Unavailable"
output.append(
{