youtube_dl/extractor/rtve.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import base64
   5 import re
   6 import time
   7
   8 from .common import InfoExtractor
   9 from ..compat import compat_urlparse
  10 from ..utils import (
  11     struct_unpack,
  12     remove_end,
  13 )
  14
  15
  16 def _decrypt_url(png):
  17     encrypted_data = base64.b64decode(png)
  18     text_index = encrypted_data.find(b'tEXt')
  19     text_chunk = encrypted_data[text_index - 4:]
  20     length = struct_unpack('!I', text_chunk[:4])[0]
  21     # Use bytearray to get integers when iterating in both python 2.x and 3.x
  22     data = bytearray(text_chunk[8:8 + length])
  23     data = [chr(b) for b in data if b != 0]
  24     hash_index = data.index('#')
  25     alphabet_data = data[:hash_index]
  26     url_data = data[hash_index + 1:]
  27
  28     alphabet = []
  29     e = 0
  30     d = 0
  31     for l in alphabet_data:
  32         if d == 0:
  33             alphabet.append(l)
  34             d = e = (e + 1) % 4
  35         else:
  36             d -= 1
  37     url = ''
  38     f = 0
  39     e = 3
  40     b = 1
  41     for letter in url_data:
  42         if f == 0:
  43             l = int(letter) * 10
  44             f = 1
  45         else:
  46             if e == 0:
  47                 l += int(letter)
  48                 url += alphabet[l]
  49                 e = (b + 3) % 4
  50                 f = 0
  51                 b += 1
  52             else:
  53                 e -= 1
  54
  55     return url
  56
  57
  58 class RTVEALaCartaIE(InfoExtractor):
  59     IE_NAME = 'rtve.es:alacarta'
  60     IE_DESC = 'RTVE a la carta'
  61     _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
  62
  63     _TESTS = [{
  64         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
  65         'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
  66         'info_dict': {
  67             'id': '2491869',
  68             'ext': 'mp4',
  69             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
  70         },
  71     }, {
  72         'note': 'Live stream',
  73         'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
  74         'info_dict': {
  75             'id': '1694255',
  76             'ext': 'flv',
  77             'title': 'TODO',
  78         },
  79         'skip': 'The f4m manifest can\'t be used yet',
  80     }, {
  81         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
  82         'only_matching': True,
  83     }]
  84
  85     def _real_extract(self, url):
  86         mobj = re.match(self._VALID_URL, url)
  87         video_id = mobj.group('id')
  88         info = self._download_json(
  89             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
  90             video_id)['page']['items'][0]
  91         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
  92         png = self._download_webpage(png_url, video_id, 'Downloading url information')
  93         video_url = _decrypt_url(png)
  94         if not video_url.endswith('.f4m'):
  95             auth_url = video_url.replace(
  96                 'resources/', 'auth/resources/'
  97             ).replace('.net.rtve', '.multimedia.cdn.rtve')
  98             video_path = self._download_webpage(
  99                 auth_url, video_id, 'Getting video url')
 100             # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
 101             # the right Content-Length header and the mp4 format
 102             video_url = compat_urlparse.urljoin(
 103                 'http://mvod1.akcdn.rtve.es/', video_path)
 104
 105         subtitles = None
 106         if info.get('sbtFile') is not None:
 107             subtitles = self.extract_subtitles(video_id, info['sbtFile'])
 108
 109         return {
 110             'id': video_id,
 111             'title': info['title'],
 112             'url': video_url,
 113             'thumbnail': info.get('image'),
 114             'page_url': url,
 115             'subtitles': subtitles,
 116         }
 117
 118     def _get_subtitles(self, video_id, sub_file):
 119         subs = self._download_json(
 120             sub_file + '.json', video_id,
 121             'Downloading subtitles info')['page']['items']
 122         return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
 123             for s in subs)
 124
 125
 126 class RTVELiveIE(InfoExtractor):
 127     IE_NAME = 'rtve.es:live'
 128     IE_DESC = 'RTVE.es live streams'
 129     _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
 130
 131     _TESTS = [{
 132         'url': 'http://www.rtve.es/noticias/directo-la-1/',
 133         'info_dict': {
 134             'id': 'directo-la-1',
 135             'ext': 'flv',
 136             'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
 137         },
 138         'params': {
 139             'skip_download': 'live stream',
 140         }
 141     }]
 142
 143     def _real_extract(self, url):
 144         mobj = re.match(self._VALID_URL, url)
 145         start_time = time.gmtime()
 146         video_id = mobj.group('id')
 147
 148         webpage = self._download_webpage(url, video_id)
 149         player_url = self._search_regex(
 150             r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
 151         title = remove_end(self._og_search_title(webpage), ' en directo')
 152         title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
 153
 154         vidplayer_id = self._search_regex(
 155             r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
 156         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
 157         png = self._download_webpage(png_url, video_id, 'Downloading url information')
 158         video_url = _decrypt_url(png)
 159
 160         return {
 161             'id': video_id,
 162             'ext': 'flv',
 163             'title': title,
 164             'url': video_url,
 165             'app': 'rtve-live-live?ovpfv=2.1.2',
 166             'player_url': player_url,
 167             'rtmp_live': True,
 168         }