Merge pull request #7599 from lalinsky/fix-youtube

[youtube] More explicit player config JSON extraction (fixes #7468)
This commit is contained in:
Sergey M 2015-11-23 20:52:23 +06:00
commit 5ae17037a3
1 changed files with 30 additions and 8 deletions

View File

@ -691,7 +691,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
{ {
'url': 'http://vid.plus/FlRa-iH7PGw', 'url': 'http://vid.plus/FlRa-iH7PGw',
'only_matching': True, 'only_matching': True,
} },
{
# Title with JS-like syntax "};"
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf',
},
'params': {
'skip_download': True,
},
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -875,16 +891,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return {} return {}
return sub_lang_list return sub_lang_list
def _get_ytplayer_config(self, webpage):
patterns = [
r';ytplayer\.config\s*=\s*({.*?});ytplayer',
r';ytplayer\.config\s*=\s*({.*?});',
]
config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None)
if config is not None:
return json.loads(uppercase_escape(config))
def _get_automatic_captions(self, video_id, webpage): def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
self.to_screen('%s: Looking for automatic captions' % video_id) self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage) player_config = self._get_ytplayer_config(webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id err_msg = 'Couldn\'t find automatic captions for %s' % video_id
if mobj is None: if player_config is None:
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
player_config = json.loads(mobj.group(1))
try: try:
args = player_config['args'] args = player_config['args']
caption_url = args['ttsurl'] caption_url = args['ttsurl']
@ -1091,10 +1115,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
age_gate = False age_gate = False
video_info = None video_info = None
# Try looking directly into the video webpage # Try looking directly into the video webpage
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) ytplayer_config = self._get_ytplayer_config(video_webpage)
if mobj: if ytplayer_config is not None:
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args'] args = ytplayer_config['args']
if args.get('url_encoded_fmt_stream_map'): if args.get('url_encoded_fmt_stream_map'):
# Convert to the same format returned by compat_parse_qs # Convert to the same format returned by compat_parse_qs