From 42362fdb5e780611b7054e52eb28621f5a9fd7ba Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 29 Jun 2016 15:49:17 +0100 Subject: [PATCH] [aenetworks] add support for show and season for A&E Network sites and History topics(closes #9816) --- youtube_dl/extractor/aenetworks.py | 181 +++++++++++++++++++++-------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 135 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 1bbfe2641..cbde8b46e 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -7,18 +7,118 @@ from ..utils import ( smuggle_url, update_url_query, unescapeHTML, + extract_attributes, +) +from ..compat import ( + compat_urlparse, ) -class AENetworksIE(InfoExtractor): +class AENetworksBaseIE(InfoExtractor): + def theplatform_url_result(self, theplatform_url, video_id, query): + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': smuggle_url( + update_url_query(theplatform_url, query), + { + 'sig': { + 'key': 'crazyjava', + 'secret': 's3cr3t' + }, + 'force_smil_url': True + }), + 'ie_key': 'ThePlatform', + } + + +class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' - _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P[^/]+)/(?:[^/]+/)+(?P[^/]+?)(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/shows/(?P[^/]+(?:/[^/]+){0,2})' + _TESTS = [{ + 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', + 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', + 'info_dict': { + 'id': '22253814', + 'ext': 'mp4', + 'title': 'Winter Is Coming', + 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', + 'timestamp': 1338306241, + 'upload_date': '20120529', + 'uploader': 'AENE-NEW', + }, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'http://www.history.com/shows/ancient-aliens/season-1', + 'info_dict': { + 'id': '71889446852', + }, + 'playlist_mincount': 5, + }, { + 'url': 'http://www.mylifetime.com/shows/atlanta-plastic', + 'info_dict': { + 'id': 'SERIES4317', + 'title': 'Atlanta Plastic', + }, + 'playlist_mincount': 2, + }, { + 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', + 'only_matching': True + }, { + 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', + 'only_matching': True + }, { + 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', + 'only_matching': True + }] + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + url_parts = display_id.split('/') + url_parts_len = len(url_parts) + if url_parts_len == 1: + entries = [] + for season_url_path in re.findall(r'(?s)]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): + entries.append(self.url_result( + compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeriesId', webpage), + self._html_search_meta('aetn:SeriesTitle', webpage)) + elif url_parts_len == 2: + entries = [] + for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): + episode_attributes = extract_attributes(episode_item) + episode_url = compat_urlparse.urljoin( + url, episode_attributes['data-canonical']) + entries.append(self.url_result( + episode_url, 'AENetworks', + episode_attributes['data-videoid'])) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeasonId', webpage)) + else: + video_id = self._html_search_meta('aetn:VideoID', webpage) + media_url = self._search_regex( + r"media_url\s*=\s*'([^']+)'", webpage, 'video url') + + info = self._search_json_ld(webpage, video_id, fatal=False) + info.update(self.theplatform_url_result( + media_url, video_id, { + 'mbr': 'true', + 'assetTypes': 'medium_video_s3' + })) + return info + + +class HistoryTopicIE(AENetworksBaseIE): + IE_NAME = 'history:topic' + IE_DESC = 'History.com Topic' + _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P[^/]+)/videos(?:/(?P[^/?#]+))?' _TESTS = [{ 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'info_dict': { - 'id': 'g12m5Gyt3fdR', + 'id': '40700995724', 'ext': 'mp4', 'title': "Bet You Didn't Know: Valentine's Day", 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', @@ -31,57 +131,38 @@ class AENetworksIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['ThePlatform'], - 'expected_warnings': ['JSON-LD'], }, { - 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', - 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', - 'info_dict': { - 'id': 'eg47EERs_JsZ', - 'ext': 'mp4', - 'title': 'Winter Is Coming', - 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', - 'timestamp': 1338306241, - 'upload_date': '20120529', - 'uploader': 'AENE-NEW', + 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos', + 'info_dict': + { + 'id': 'world-war-i-history', }, - 'add_ie': ['ThePlatform'], + 'playlist_mincount': 24, }, { - 'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', - 'only_matching': True - }, { - 'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage', - 'only_matching': True - }, { - 'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients', - 'only_matching': True + 'url': 'http://www.history.com/topics/world-war-i-history/videos', + 'only_matching': True, }] def _real_extract(self, url): - page_type, video_id = re.match(self._VALID_URL, url).groups() + topic_id, display_id = re.match(self._VALID_URL, url).groups() + if display_id: + webpage = self._download_webpage(url, display_id) + release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() + release_url = unescapeHTML(release_url) - webpage = self._download_webpage(url, video_id) - - video_url_re = [ - r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, - r"media_url\s*=\s*'([^']+)'" - ] - video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url')) - query = {'mbr': 'true'} - if page_type == 'shows': - query['assetTypes'] = 'medium_video_s3' - if 'switch=hds' in video_url: - query['switch'] = 'hls' - - info = self._search_json_ld(webpage, video_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'url': smuggle_url( - update_url_query(video_url, query), - { - 'sig': { - 'key': 'crazyjava', - 'secret': 's3cr3t'}, - 'force_smil_url': True - }), - }) - return info + return self.theplatform_url_result( + release_url, video_id, { + 'mbr': 'true', + 'switch': 'hls' + }) + else: + webpage = self._download_webpage(url, topic_id) + entries = [] + for episode_item in re.findall(r']*>', webpage): + video_attributes = extract_attributes(episode_item) + entries.append(self.theplatform_url_result( + video_attributes['data-href'], video_attributes['data-id'], { + 'mbr': 'true', + 'switch': 'hls' + })) + return self.playlist_result(entries, topic_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2f9ee1596..bba88e9eb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -20,7 +20,10 @@ from .adobetv import ( AdobeTVVideoIE, ) from .adultswim import AdultSwimIE -from .aenetworks import AENetworksIE +from .aenetworks import ( + AENetworksIE, + HistoryTopicIE, +) from .afreecatv import AfreecaTVIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE