youtube_dl/extractor/patreon.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import js_to_json
   6
   7
   8 class PatreonIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
  10     _TESTS = [
  11         {
  12             'url': 'http://www.patreon.com/creation?hid=743933',
  13             'md5': 'e25505eec1053a6e6813b8ed369875cc',
  14             'info_dict': {
  15                 'id': '743933',
  16                 'ext': 'mp3',
  17                 'title': 'Episode 166: David Smalley of Dogma Debate',
  18                 'uploader': 'Cognitive Dissonance Podcast',
  19                 'thumbnail': 're:^https?://.*$',
  20             },
  21         },
  22         {
  23             'url': 'http://www.patreon.com/creation?hid=754133',
  24             'md5': '3eb09345bf44bf60451b8b0b81759d0a',
  25             'info_dict': {
  26                 'id': '754133',
  27                 'ext': 'mp3',
  28                 'title': 'CD 167 Extra',
  29                 'uploader': 'Cognitive Dissonance Podcast',
  30                 'thumbnail': 're:^https?://.*$',
  31             },
  32         },
  33         {
  34             'url': 'https://www.patreon.com/creation?hid=1682498',
  35             'info_dict': {
  36                 'id': 'SU4fj_aEMVw',
  37                 'ext': 'mp4',
  38                 'title': 'I\'m on Patreon!',
  39                 'uploader': 'TraciJHines',
  40                 'thumbnail': 're:^https?://.*$',
  41                 'upload_date': '20150211',
  42                 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
  43                 'uploader_id': 'TraciJHines',
  44             },
  45             'params': {
  46                 'noplaylist': True,
  47                 'skip_download': True,
  48             }
  49         }
  50     ]
  51
  52     # Currently Patreon exposes download URL via hidden CSS, so login is not
  53     # needed. Keeping this commented for when this inevitably changes.
  54     '''
  55     def _login(self):
  56         (username, password) = self._get_login_info()
  57         if username is None:
  58             return
  59
  60         login_form = {
  61             'redirectUrl': 'http://www.patreon.com/',
  62             'email': username,
  63             'password': password,
  64         }
  65
  66         request = sanitized_Request(
  67             'https://www.patreon.com/processLogin',
  68             compat_urllib_parse_urlencode(login_form).encode('utf-8')
  69         )
  70         login_page = self._download_webpage(request, None, note='Logging in')
  71
  72         if re.search(r'onLoginFailed', login_page):
  73             raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
  74
  75     def _real_initialize(self):
  76         self._login()
  77     '''
  78
  79     def _real_extract(self, url):
  80         video_id = self._match_id(url)
  81         webpage = self._download_webpage(url, video_id)
  82         title = self._og_search_title(webpage).strip()
  83
  84         attach_fn = self._html_search_regex(
  85             r'<div class="attach"><a target="_blank" href="([^"]+)">',
  86             webpage, 'attachment URL', default=None)
  87         embed = self._html_search_regex(
  88             r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
  89             webpage, 'embedded URL', default=None)
  90
  91         if attach_fn is not None:
  92             video_url = 'http://www.patreon.com' + attach_fn
  93             thumbnail = self._og_search_thumbnail(webpage)
  94             uploader = self._html_search_regex(
  95                 r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
  96         elif embed is not None:
  97             return self.url_result(embed)
  98         else:
  99             playlist = self._parse_json(self._search_regex(
 100                 r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
 101                 webpage, 'playlist JSON'),
 102                 video_id, transform_source=js_to_json)
 103             data = playlist[0]
 104             video_url = self._proto_relative_url(data['mp3'])
 105             thumbnail = self._proto_relative_url(data.get('cover'))
 106             uploader = data.get('artist')
 107
 108         return {
 109             'id': video_id,
 110             'url': video_url,
 111             'ext': 'mp3',
 112             'title': title,
 113             'uploader': uploader,
 114             'thumbnail': thumbnail,
 115         }