youtube_dl/extractor/cammodels.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     int_or_none,
   8     url_or_none,
   9 )
  10
  11
  12 class CamModelsIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
  14     _TESTS = [{
  15         'url': 'https://www.cammodels.com/cam/AutumnKnight/',
  16         'only_matching': True,
  17         'age_limit': 18
  18     }]
  19
  20     def _real_extract(self, url):
  21         user_id = self._match_id(url)
  22
  23         webpage = self._download_webpage(
  24             url, user_id, headers=self.geo_verification_headers())
  25
  26         manifest_root = self._html_search_regex(
  27             r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
  28
  29         if not manifest_root:
  30             ERRORS = (
  31                 ("I'm offline, but let's stay connected", 'This user is currently offline'),
  32                 ('in a private show', 'This user is in a private show'),
  33                 ('is currently performing LIVE', 'This model is currently performing live'),
  34             )
  35             for pattern, message in ERRORS:
  36                 if pattern in webpage:
  37                     error = message
  38                     expected = True
  39                     break
  40             else:
  41                 error = 'Unable to find manifest URL root'
  42                 expected = False
  43             raise ExtractorError(error, expected=expected)
  44
  45         manifest = self._download_json(
  46             '%s%s.json' % (manifest_root, user_id), user_id)
  47
  48         formats = []
  49         for format_id, format_dict in manifest['formats'].items():
  50             if not isinstance(format_dict, dict):
  51                 continue
  52             encodings = format_dict.get('encodings')
  53             if not isinstance(encodings, list):
  54                 continue
  55             vcodec = format_dict.get('videoCodec')
  56             acodec = format_dict.get('audioCodec')
  57             for media in encodings:
  58                 if not isinstance(media, dict):
  59                     continue
  60                 media_url = url_or_none(media.get('location'))
  61                 if not media_url:
  62                     continue
  63
  64                 format_id_list = [format_id]
  65                 height = int_or_none(media.get('videoHeight'))
  66                 if height is not None:
  67                     format_id_list.append('%dp' % height)
  68                 f = {
  69                     'url': media_url,
  70                     'format_id': '-'.join(format_id_list),
  71                     'width': int_or_none(media.get('videoWidth')),
  72                     'height': height,
  73                     'vbr': int_or_none(media.get('videoKbps')),
  74                     'abr': int_or_none(media.get('audioKbps')),
  75                     'fps': int_or_none(media.get('fps')),
  76                     'vcodec': vcodec,
  77                     'acodec': acodec,
  78                 }
  79                 if 'rtmp' in format_id:
  80                     f['ext'] = 'flv'
  81                 elif 'hls' in format_id:
  82                     f.update({
  83                         'ext': 'mp4',
  84                         # hls skips fragments, preferring rtmp
  85                         'preference': -1,
  86                     })
  87                 else:
  88                     continue
  89                 formats.append(f)
  90         self._sort_formats(formats)
  91
  92         return {
  93             'id': user_id,
  94             'title': self._live_title(user_id),
  95             'is_live': True,
  96             'formats': formats,
  97             'age_limit': 18
  98         }