youtube_dl/extractor/beeg.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..compat import (
   5     compat_str,
   6     compat_urlparse,
   7 )
   8 from ..utils import (
   9     int_or_none,
  10     unified_timestamp,
  11 )
  12
  13
  14 class BeegIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
  16     _TESTS = [{
  17         # api/v6 v1
  18         'url': 'http://beeg.com/5416503',
  19         'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
  20         'info_dict': {
  21             'id': '5416503',
  22             'ext': 'mp4',
  23             'title': 'Sultry Striptease',
  24             'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
  25             'timestamp': 1391813355,
  26             'upload_date': '20140207',
  27             'duration': 383,
  28             'tags': list,
  29             'age_limit': 18,
  30         }
  31     }, {
  32         # api/v6 v2
  33         'url': 'https://beeg.com/1941093077?t=911-1391',
  34         'only_matching': True,
  35     }, {
  36         # api/v6 v2 w/o t
  37         'url': 'https://beeg.com/1277207756',
  38         'only_matching': True,
  39     }, {
  40         'url': 'https://beeg.porn/video/5416503',
  41         'only_matching': True,
  42     }, {
  43         'url': 'https://beeg.porn/5416503',
  44         'only_matching': True,
  45     }]
  46
  47     def _real_extract(self, url):
  48         video_id = self._match_id(url)
  49
  50         webpage = self._download_webpage(url, video_id)
  51
  52         beeg_version = self._search_regex(
  53             r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
  54             default='1546225636701')
  55
  56         if len(video_id) >= 10:
  57             query = {
  58                 'v': 2,
  59             }
  60             qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  61             t = qs.get('t', [''])[0].split('-')
  62             if len(t) > 1:
  63                 query.update({
  64                     's': t[0],
  65                     'e': t[1],
  66                 })
  67         else:
  68             query = {'v': 1}
  69
  70         for api_path in ('', 'api.'):
  71             video = self._download_json(
  72                 'https://%sbeeg.com/api/v6/%s/video/%s'
  73                 % (api_path, beeg_version, video_id), video_id,
  74                 fatal=api_path == 'api.', query=query)
  75             if video:
  76                 break
  77
  78         formats = []
  79         for format_id, video_url in video.items():
  80             if not video_url:
  81                 continue
  82             height = self._search_regex(
  83                 r'^(\d+)[pP]$', format_id, 'height', default=None)
  84             if not height:
  85                 continue
  86             formats.append({
  87                 'url': self._proto_relative_url(
  88                     video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
  89                 'format_id': format_id,
  90                 'height': int(height),
  91             })
  92         self._sort_formats(formats)
  93
  94         title = video['title']
  95         video_id = compat_str(video.get('id') or video_id)
  96         display_id = video.get('code')
  97         description = video.get('desc')
  98         series = video.get('ps_name')
  99
 100         timestamp = unified_timestamp(video.get('date'))
 101         duration = int_or_none(video.get('duration'))
 102
 103         tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
 104
 105         return {
 106             'id': video_id,
 107             'display_id': display_id,
 108             'title': title,
 109             'description': description,
 110             'series': series,
 111             'timestamp': timestamp,
 112             'duration': duration,
 113             'tags': tags,
 114             'formats': formats,
 115             'age_limit': self._rta_search(webpage),
 116         }