From 8963d9c2661b7de8832b7afcf1cdbc197275d8e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 9 Sep 2013 10:33:12 +0200 Subject: [PATCH] [youtube] Modify the regex to match ids of length 11 (fixes #1396) In urls like http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930 you can't split the query string and ids always have that length. --- test/test_all_urls.py | 11 +++++++---- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 5d8d93e0e..99fc7bd28 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -72,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase): self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) def test_youtube_extract(self): - self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') - self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') - self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') - self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc') + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id) + assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') + assertExtractId('BaW_jenozKc', 'BaW_jenozKc') def test_no_duplicates(self): ies = gen_extractors() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bad15cb44..6a8351293 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -150,7 +150,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): |youtu\.be/ # just youtu.be/xxxx ) )? # all until now is optional -> you can pass the naked ID - ([0-9A-Za-z_-]+) # here is it! the YouTube video ID + ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID (?(1).+)? # if we found the ID, everything can follow $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'