youtube_dl/extractor/videomore.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     int_or_none,
   9     parse_age_limit,
  10     parse_iso8601,
  11     xpath_text,
  12 )
  13
  14
  15 class VideomoreIE(InfoExtractor):
  16     IE_NAME = 'videomore'
  17     _VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)'
  18     _TESTS = [{
  19         'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
  20         'md5': '70875fbf57a1cd004709920381587185',
  21         'info_dict': {
  22             'id': '367617',
  23             'ext': 'flv',
  24             'title': 'В гостях Алексей Чумаков и Юлия Ковальчук',
  25             'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.',
  26             'thumbnail': 're:^https?://.*\.jpg',
  27             'duration': 2910,
  28             'age_limit': 16,
  29             'view_count': int,
  30         },
  31     }, {
  32         'url': 'http://videomore.ru/elki_3?track_id=364623',
  33         'only_matching': True,
  34     }, {
  35         'url': 'http://videomore.ru/embed/364623',
  36         'only_matching': True,
  37     }, {
  38         'url': 'http://videomore.ru/video/tracks/364623.xml',
  39         'only_matching': True,
  40     }, {
  41         'url': 'http://videomore.ru/video/tracks/364623.json',
  42         'only_matching': True,
  43     }, {
  44         'url': 'http://videomore.ru/video/tracks/158031/quotes/33248',
  45         'only_matching': True,
  46     }, {
  47         'url': 'videomore:367617',
  48         'only_matching': True,
  49     }]
  50
  51     @staticmethod
  52     def _extract_url(webpage):
  53         mobj = re.search(
  54             r'<object[^>]+data=(["\'])https?://videomore.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
  55             webpage)
  56         if mobj:
  57             return mobj.group('url')
  58
  59     def _real_extract(self, url):
  60         mobj = re.match(self._VALID_URL, url)
  61         video_id = mobj.group('sid') or mobj.group('id')
  62
  63         video = self._download_xml(
  64             'http://videomore.ru/video/tracks/%s.xml' % video_id,
  65             video_id, 'Downloading video XML')
  66
  67         video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
  68         formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
  69
  70         data = self._download_json(
  71             'http://videomore.ru/video/tracks/%s.json' % video_id,
  72             video_id, 'Downloadinng video JSON')
  73
  74         title = data.get('title') or data['project_title']
  75         description = data.get('description') or data.get('description_raw')
  76         timestamp = parse_iso8601(data.get('published_at'))
  77         duration = int_or_none(data.get('duration'))
  78         view_count = int_or_none(data.get('views'))
  79         age_limit = parse_age_limit(data.get('min_age'))
  80         thumbnails = [{
  81             'url': thumbnail,
  82         } for thumbnail in data.get('big_thumbnail_urls', [])]
  83
  84         return {
  85             'id': video_id,
  86             'title': title,
  87             'description': description,
  88             'thumbnails': thumbnails,
  89             'timestamp': timestamp,
  90             'duration': duration,
  91             'view_count': view_count,
  92             'age_limit': age_limit,
  93             'formats': formats,
  94         }
  95
  96
  97 class VideomoreVideoIE(InfoExtractor):
  98     IE_NAME = 'videomore:video'
  99     _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)[/?#&]*$'
 100     _TESTS = [{
 101         # single video with og:video:iframe
 102         'url': 'http://videomore.ru/elki_3',
 103         'info_dict': {
 104             'id': '364623',
 105             'ext': 'flv',
 106             'title': 'Ёлки 3',
 107             'description': '',
 108             'thumbnail': 're:^https?://.*\.jpg',
 109             'duration': 5579,
 110             'age_limit': 6,
 111             'view_count': int,
 112         },
 113         'params': {
 114             'skip_download': True,
 115         },
 116     }, {
 117         # season single serie with og:video:iframe
 118         'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
 119         'only_matching': True,
 120     }, {
 121         'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
 122         'only_matching': True,
 123     }, {
 124         # single video without og:video:iframe
 125         'url': 'http://videomore.ru/marin_i_ego_druzya',
 126         'info_dict': {
 127             'id': '359073',
 128             'ext': 'flv',
 129             'title': '1 серия. Здравствуй, Аквавилль!',
 130             'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7',
 131             'thumbnail': 're:^https?://.*\.jpg',
 132             'duration': 754,
 133             'age_limit': 6,
 134             'view_count': int,
 135         },
 136         'params': {
 137             'skip_download': True,
 138         },
 139     }]
 140
 141     @classmethod
 142     def suitable(cls, url):
 143         return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url)
 144
 145     def _real_extract(self, url):
 146         display_id = self._match_id(url)
 147
 148         webpage = self._download_webpage(url, display_id)
 149
 150         video_url = self._og_search_property(
 151             'video:iframe', webpage, 'video url', default=None)
 152
 153         if not video_url:
 154             video_id = self._search_regex(
 155                 (r'config\s*:\s*["\']https?://videomore\.ru/video/tracks/(\d+)\.xml',
 156                  r'track-id=["\'](\d+)',
 157                  r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id')
 158             video_url = 'videomore:%s' % video_id
 159
 160         return self.url_result(video_url, VideomoreIE.ie_key())
 161
 162
 163 class VideomoreSeasonIE(InfoExtractor):
 164     IE_NAME = 'videomore:season'
 165     _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)[/?#&]*$'
 166     _TESTS = [{
 167         'url': 'http://videomore.ru/molodezhka/sezon_promo',
 168         'info_dict': {
 169             'id': 'molodezhka/sezon_promo',
 170             'title': 'Молодежка Промо',
 171         },
 172         'playlist_mincount': 12,
 173     }]
 174
 175     def _real_extract(self, url):
 176         display_id = self._match_id(url)
 177
 178         webpage = self._download_webpage(url, display_id)
 179
 180         title = self._og_search_title(webpage)
 181
 182         entries = [
 183             self.url_result(item) for item in re.findall(
 184                 r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
 185                 % display_id, webpage)]
 186
 187         return self.playlist_result(entries, display_id, title)