From fcea44c6d516564b40d55f4989bb7710a0edf21d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 6 Jan 2014 17:31:47 +0100 Subject: [PATCH] [vimeo] Add support for review pages Since the regexp is already overboarding and review pages have a distinct URL format (with non-trivial stuff after the ID), use a dedicated IE. Fixes #2106 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vimeo.py | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 21d564dba..f1167989e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -199,6 +199,7 @@ from .vimeo import ( VimeoUserIE, VimeoAlbumIE, VimeoGroupsIE, + VimeoReviewIE, ) from .vine import VineIE from .viki import VikiIE diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c3623fcbe..05e1aa1f2 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -311,7 +311,7 @@ class VimeoChannelIE(InfoExtractor): class VimeoUserIE(VimeoChannelIE): IE_NAME = u'vimeo:user' - _VALID_URL = r'(?:https?://)?vimeo.\com/(?P[^/]+)' + _VALID_URL = r'(?:https?://)?vimeo.\com/(?P[^/]+)(?:[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' @classmethod @@ -336,7 +336,7 @@ class VimeoAlbumIE(VimeoChannelIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - album_id = mobj.group('id') + album_id = mobj.group('id') return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) @@ -351,3 +351,24 @@ class VimeoGroupsIE(VimeoAlbumIE): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name) + + +class VimeoReviewIE(InfoExtractor): + IE_NAME = u'vimeo:review' + IE_DESC = u'Review pages on vimeo' + _VALID_URL = r'(?:https?://)?vimeo.\com/[^/]+/review/(?P[^/]+)' + _TEST = { + 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', + 'file': '75524534.mp4', + 'md5': 'c507a72f780cacc12b2248bb4006d253', + 'info_dict': { + 'title': "DICK HARDWICK 'Comedian'", + 'uploader': 'Richard Hardwick', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + player_url = 'https://player.vimeo.com/player/' + video_id + return self.url_result(player_url, 'Vimeo', video_id)