[generic] Prevent from downloading a .swf as a video

We're seeing quite a number of people who do not put a video file in the og:video field, but the player URL. Try to detect some of these and filter them out.
This commit is contained in:
Philipp Hagemeister 2014-08-24 02:24:47 +02:00
parent 6857590059
commit fa8deaf38b
1 changed files with 6 additions and 1 deletions

View File

@ -831,7 +831,12 @@ class GenericIE(InfoExtractor):
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player: # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None: if m_video_type is not None:
found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) def check_video(vurl):
vpath = compat_urlparse.urlparse(vurl).path
return not vpath.endswith('.swf')
found = list(filter(
check_video,
re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
if not found: if not found:
# HTML5 video # HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage) found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)