From 05fe2594e4589b4e714a423550172eeec3949a70 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 20 Aug 2015 01:38:39 +0800 Subject: [PATCH] [theplatform] Support URLs with 'guid=' --- youtube_dl/extractor/theplatform.py | 36 +++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index f02e0f58d..883bf491c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -9,6 +9,10 @@ import hashlib from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( determine_ext, ExtractorError, @@ -120,6 +124,20 @@ class ThePlatformIE(ThePlatformBaseIE): }, { 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', 'only_matching': True, + }, { + 'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701', + 'md5': '734f3790fb5fc4903da391beeebc4836', + 'info_dict': { + 'id': 'tdy_or_siri_150701', + 'ext': 'mp4', + 'title': 'iPhone Siri’s sassy response to a math question has people talking', + 'description': 'md5:a565d1deadd5086f3331d57298ec6333', + 'duration': 83.0, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1435752600, + 'upload_date': '20150701', + 'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"], + }, }] @staticmethod @@ -154,6 +172,24 @@ class ThePlatformIE(ThePlatformBaseIE): path += '/media' path += '/' + video_id + qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + if 'guid' in qs_dict: + webpage = self._download_webpage(url, video_id) + scripts = re.findall(r']+src="([^"]+)"', webpage) + feed_id = None + # feed id usually locates in the last script. + # Seems there's no pattern for the interested script filename, so + # I try one by one + for script in reversed(scripts): + feed_script = self._download_webpage(script, video_id, 'Downloading feed script') + feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None) + if feed_id is not None: + break + if feed_id is None: + raise ExtractorError('Unable to find feed id') + return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % ( + provider_id, feed_id, qs_dict['guid'][0])) + if smuggled_data.get('force_smil_url', False): smil_url = url elif mobj.group('config'):