From 7151f63a5f3820a322ba8bf61eebe8d9f75d6ee5 Mon Sep 17 00:00:00 2001 From: cryzed Date: Fri, 9 Mar 2012 22:05:35 +0100 Subject: [PATCH] Fixed downloading of unrelated videos when downloading a YouTube playlist --- youtube-dl | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/youtube-dl b/youtube-dl index 789724041..08acbc4ae 100755 --- a/youtube-dl +++ b/youtube-dl @@ -766,7 +766,7 @@ class FileDownloader(object): raise MaxDownloadsReached() filename = self.prepare_filename(info_dict) - + # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -842,7 +842,7 @@ class FileDownloader(object): except (ContentTooShortError, ), err: self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - + if success: try: self.post_process(filename, info_dict) @@ -1183,7 +1183,7 @@ class YoutubeIE(InfoExtractor): '43': '360x640', '44': '480x854', '45': '720x1280', - } + } IE_NAME = u'youtube' def report_lang(self): @@ -2519,7 +2519,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' - _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' + _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=PL%s&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None IE_NAME = u'youtube:playlist' @@ -2571,7 +2571,8 @@ class YoutubePlaylistIE(InfoExtractor): # Extract video identifiers ids_in_page = [] - for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_indicator = self._VIDEO_INDICATOR_TEMPLATE % playlist_id + for mobj in re.finditer(video_indicator, page): if mobj.group(1) not in ids_in_page: ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) @@ -2582,7 +2583,11 @@ class YoutubePlaylistIE(InfoExtractor): playliststart = self._downloader.params.get('playliststart', 1) - 1 playlistend = self._downloader.params.get('playlistend', -1) - video_ids = video_ids[playliststart:playlistend] + + if playlistend == -1: + video_ids = video_ids[playliststart:] + else: + video_ids = video_ids[playliststart:playlistend] for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) @@ -3028,14 +3033,14 @@ class BlipTVIE(InfoExtractor): data = json_data['Post'] else: data = json_data - + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] umobj = re.match(self._URL_EXT, video_url) if umobj is None: raise ValueError('Can not determine filename extension') ext = umobj.group(1) - + info = { 'id': data['item_id'], 'url': video_url, @@ -3069,7 +3074,7 @@ class MyVideoIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - + def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) @@ -3136,7 +3141,7 @@ class ComedyCentralIE(InfoExtractor): def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) - + def report_config_download(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) @@ -3563,7 +3568,7 @@ class SoundcloudIE(InfoExtractor): mobj = re.search('track-description-value">

(.*?)

', webpage) if mobj: description = mobj.group(1) - + # upload date upload_date = None mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)", webpage) @@ -3698,7 +3703,7 @@ class MixcloudIE(InfoExtractor): url_list = jsonData[fmt][bitrate] except TypeError: # we have no bitrate info. url_list = jsonData[fmt] - + return url_list def check_urls(self, url_list): @@ -3818,7 +3823,7 @@ class StanfordOpenClassroomIE(InfoExtractor): info = { 'id': _simplify_title(course + '_' + video), } - + self.report_extraction(info['id']) baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' xmlUrl = baseUrl + video + '.xml' @@ -3952,7 +3957,7 @@ class MTVIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract performer') return performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1')) - video_title = performer + ' - ' + song_name + video_title = performer + ' - ' + song_name mobj = re.search(r'', webpage) if mobj is None: @@ -4194,7 +4199,7 @@ def updateSelf(downloader, filename): try: urlh = urllib.urlopen(UPDATE_URL) newcontent = urlh.read() - + vmatch = re.search("__version__ = '([^']+)'", newcontent) if vmatch is not None and vmatch.group(1) == __version__: downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')') @@ -4615,7 +4620,7 @@ def _real_main(): parser.error(u'you must provide at least one URL') else: sys.exit() - + try: retcode = fd.download(all_urls) except MaxDownloadsReached: