From 96d315c2be1c00339b4cb77e80b3d5f63770a735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Jan 2017 05:32:18 +0700 Subject: [PATCH] [pornhub:playlist] Improve extraction (closes #11594) --- youtube_dl/extractor/pornhub.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 40dbe6967..3eaf56973 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -229,7 +229,14 @@ class PornHubPlaylistBaseIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - entries = self._extract_entries(webpage) + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/rg3/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(]+class=["\']container.+)', webpage, + 'container', default=webpage) + + entries = self._extract_entries(container) playlist = self._parse_json( self._search_regex( @@ -243,12 +250,12 @@ class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistIE(PornHubPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P\d+)' _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/6201671', + 'url': 'http://www.pornhub.com/playlist/4667351', 'info_dict': { - 'id': '6201671', - 'title': 'P0p4', + 'id': '4667351', + 'title': 'Nataly Hot', }, - 'playlist_mincount': 35, + 'playlist_mincount': 2, }]