[tunein] Add new extractor (Closes #4097)

This commit is contained in:
Naglis Jonaitis 2014-11-24 23:15:33 +02:00
parent 00e9d396ab
commit 2c25a2bd29
2 changed files with 102 additions and 0 deletions

View File

@ -405,6 +405,7 @@ from .trutube import TruTubeIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE from .turbo import TurboIE
from .tutv import TutvIE from .tutv import TutvIE
from .tvigle import TvigleIE from .tvigle import TvigleIE

View File

@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class TuneInIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?:
tunein\.com/
(?:
radio/.*?-s|
station/.*?StationId\=
)(?P<id>[0-9]+)
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
)
'''
_INFO_DICT = {
'id': '34682',
'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
'ext': 'AAC',
'thumbnail': 're:^https?://.*\.png$',
'location': 'Tacoma, WA',
}
_TESTS = [
{
'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
'info_dict': _INFO_DICT,
'params': {
'skip_download': True, # live stream
},
},
{ # test redirection
'url': 'http://tun.in/ser7s',
'info_dict': _INFO_DICT,
'params': {
'skip_download': True, # live stream
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
redirect_id = mobj.group('redirect_id')
if redirect_id:
# The server doesn't support HEAD requests
urlh = self._request_webpage(
url, redirect_id, note='Downloading redirect page')
url = urlh.geturl()
self.to_screen('Following redirect: %s' % url)
mobj = re.match(self._VALID_URL, url)
station_id = mobj.group('id')
webpage = self._download_webpage(
url, station_id, note='Downloading station webpage')
payload = self._html_search_regex(
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
json_data = json.loads(payload)
station_info = json_data['Station']['broadcast']
title = station_info['Title']
thumbnail = station_info.get('Logo')
location = station_info.get('Location')
streams_url = station_info.get('StreamUrl')
if not streams_url:
raise ExtractorError('No downloadable streams found',
expected=True)
stream_data = self._download_webpage(
streams_url, station_id, note='Downloading stream data')
streams = json.loads(self._search_regex(
r'\((.*)\);', stream_data, 'stream info'))['Streams']
is_live = None
formats = []
for stream in streams:
if stream.get('Type') == 'Live':
is_live = True
formats.append({
'abr': stream.get('Bandwidth'),
'ext': stream.get('MediaType'),
'acodec': stream.get('MediaType'),
'vcodec': 'none',
'url': stream.get('Url'),
# Sometimes streams with the highest quality do not exist
'preference': stream.get('Reliability'),
})
self._sort_formats(formats)
return {
'id': station_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'location': location,
'is_live': is_live,
}