From 725652e9247e1171110b624d748e20fa1c88260e Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Sat, 16 May 2015 19:50:58 -0500 Subject: [PATCH 1/2] [karrierevideos] add support for www.karrierevideos.at (closes #5354) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/karrierevideos.py | 52 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/karrierevideos.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb4f63ca3..d131d3ec3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -244,6 +244,7 @@ from .kaltura import KalturaIE from .kanalplay import KanalPlayIE from .kankan import KankanIE from .karaoketv import KaraoketvIE +from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py new file mode 100644 index 000000000..59d29e845 --- /dev/null +++ b/youtube_dl/extractor/karrierevideos.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class KarriereVideosIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?karrierevideos\.at/berufsvideos/([a-z-]+)/(?P[a-z-]+)' + _TEST = { + 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', + 'info_dict': { + 'id': 'altenpflegerin', + 'ext': 'mp4', + 'title': 'AltenpflegerIn', + 'thumbnail': 're:^http://.*\.png\?v=[0-9]+', + 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2' + }, + 'params': { + 'skip_download': 'requires rtmpdump' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + description = self._html_search_regex( + r'
\n{0,}?\s{0,}

(.*?)

', + webpage, 'description') + + playlist = self._html_search_regex(r'/config/video/(.*?)\.xml', webpage, 'playlist') + playlist = self._download_xml( + 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % playlist, + video_id) + + namespace = { + 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' + } + + item = playlist.find('tracklist/item') + streamer = item.find('jwplayer:streamer', namespace).text + + return { + 'id': video_id, + 'title': self._html_search_meta('title', webpage), + 'description': description, + 'thumbnail': 'http://www.karrierevideos.at' + self._html_search_meta('thumbnail', webpage), + 'protocol': 'rtmp', + 'url': streamer.replace('rtmpt', 'http'), + 'play_path': 'mp4:' + item.find('jwplayer:file', namespace).text, + 'tc_url': streamer, + 'ext': 'mp4' + } From ba9d16291b8ace3bd412bcfc0c128c047545e509 Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Sun, 17 May 2015 03:35:08 -0500 Subject: [PATCH 2/2] manually specify namespace --- youtube_dl/extractor/karrierevideos.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 59d29e845..a05e8ab76 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -29,15 +29,13 @@ class KarriereVideosIE(InfoExtractor): playlist = self._html_search_regex(r'/config/video/(.*?)\.xml', webpage, 'playlist') playlist = self._download_xml( - 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % playlist, + 'http://www.karrierevideos.at/player-playlist.xml.php?p=' + playlist, video_id) - namespace = { - 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' - } + namespace = 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' item = playlist.find('tracklist/item') - streamer = item.find('jwplayer:streamer', namespace).text + streamer = item.find('{%s}streamer' % namespace).text return { 'id': video_id, @@ -46,7 +44,7 @@ class KarriereVideosIE(InfoExtractor): 'thumbnail': 'http://www.karrierevideos.at' + self._html_search_meta('thumbnail', webpage), 'protocol': 'rtmp', 'url': streamer.replace('rtmpt', 'http'), - 'play_path': 'mp4:' + item.find('jwplayer:file', namespace).text, + 'play_path': 'mp4:' + item.find('{%s}file' % namespace).text, 'tc_url': streamer, 'ext': 'mp4' }