youtube_dl/extractor/twentymin.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     int_or_none,
   9     try_get,
  10 )
  11
  12
  13 class TwentyMinutenIE(InfoExtractor):
  14     IE_NAME = '20min'
  15     _VALID_URL = r'''(?x)
  16                     https?://
  17                         (?:www\.)?20min\.ch/
  18                         (?:
  19                             videotv/*\?.*?\bvid=|
  20                             videoplayer/videoplayer\.html\?.*?\bvideoId@
  21                         )
  22                         (?P<id>\d+)
  23                     '''
  24     _TESTS = [{
  25         'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
  26         'md5': 'e7264320db31eed8c38364150c12496e',
  27         'info_dict': {
  28             'id': '469148',
  29             'ext': 'mp4',
  30             'title': '85 000 Franken für 15 perfekte Minuten',
  31             'thumbnail': r're:https?://.*\.jpg$',
  32         },
  33     }, {
  34         'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
  35         'info_dict': {
  36             'id': '523629',
  37             'ext': 'mp4',
  38             'title': 'So kommen Sie bei Eis und Schnee sicher an',
  39             'description': 'md5:117c212f64b25e3d95747e5276863f7d',
  40             'thumbnail': r're:https?://.*\.jpg$',
  41         },
  42         'params': {
  43             'skip_download': True,
  44         },
  45     }, {
  46         'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
  47         'only_matching': True,
  48     }]
  49
  50     @staticmethod
  51     def _extract_urls(webpage):
  52         return [m.group('url') for m in re.finditer(
  53             r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
  54             webpage)]
  55
  56     def _real_extract(self, url):
  57         video_id = self._match_id(url)
  58
  59         video = self._download_json(
  60             'http://api.20min.ch/video/%s/show' % video_id,
  61             video_id)['content']
  62
  63         title = video['title']
  64
  65         formats = [{
  66             'format_id': format_id,
  67             'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
  68             'quality': quality,
  69         } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
  70         self._sort_formats(formats)
  71
  72         description = video.get('lead')
  73         thumbnail = video.get('thumbnail')
  74
  75         def extract_count(kind):
  76             return try_get(
  77                 video,
  78                 lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
  79
  80         like_count = extract_count('up')
  81         dislike_count = extract_count('down')
  82
  83         return {
  84             'id': video_id,
  85             'title': title,
  86             'description': description,
  87             'thumbnail': thumbnail,
  88             'like_count': like_count,
  89             'dislike_count': dislike_count,
  90             'formats': formats,
  91         }