youtube_dl/extractor/adultswim.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5 import re
   6
   7 from .turner import TurnerBaseIE
   8 from ..utils import (
   9     determine_ext,
  10     float_or_none,
  11     int_or_none,
  12     mimetype2ext,
  13     parse_age_limit,
  14     parse_iso8601,
  15     strip_or_none,
  16     try_get,
  17 )
  18
  19
  20 class AdultSwimIE(TurnerBaseIE):
  21     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
  22
  23     _TESTS = [{
  24         'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
  25         'info_dict': {
  26             'id': 'rQxZvXQ4ROaSOqq-or2Mow',
  27             'ext': 'mp4',
  28             'title': 'Rick and Morty - Pilot',
  29             'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
  30             'timestamp': 1543294800,
  31             'upload_date': '20181127',
  32         },
  33         'params': {
  34             # m3u8 download
  35             'skip_download': True,
  36         },
  37         'expected_warnings': ['Unable to download f4m manifest'],
  38     }, {
  39         'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
  40         'info_dict': {
  41             'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
  42             'ext': 'mp4',
  43             'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
  44             'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
  45             'upload_date': '20080124',
  46             'timestamp': 1201150800,
  47         },
  48         'params': {
  49             # m3u8 download
  50             'skip_download': True,
  51         },
  52         'skip': '404 Not Found',
  53     }, {
  54         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
  55         'info_dict': {
  56             'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
  57             'ext': 'mp4',
  58             'title': 'Decker - Inside Decker: A New Hero',
  59             'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
  60             'timestamp': 1469480460,
  61             'upload_date': '20160725',
  62         },
  63         'params': {
  64             # m3u8 download
  65             'skip_download': True,
  66         },
  67         'expected_warnings': ['Unable to download f4m manifest'],
  68     }, {
  69         'url': 'http://www.adultswim.com/videos/attack-on-titan',
  70         'info_dict': {
  71             'id': 'attack-on-titan',
  72             'title': 'Attack on Titan',
  73             'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
  74         },
  75         'playlist_mincount': 12,
  76     }, {
  77         'url': 'http://www.adultswim.com/videos/streams/williams-stream',
  78         'info_dict': {
  79             'id': 'd8DEBj7QRfetLsRgFnGEyg',
  80             'ext': 'mp4',
  81             'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
  82             'description': 'original programming',
  83         },
  84         'params': {
  85             # m3u8 download
  86             'skip_download': True,
  87         },
  88         'skip': '404 Not Found',
  89     }]
  90
  91     def _real_extract(self, url):
  92         show_path, episode_path = re.match(self._VALID_URL, url).groups()
  93         display_id = episode_path or show_path
  94         query = '''query {
  95   getShowBySlug(slug:"%s") {
  96     %%s
  97   }
  98 }''' % show_path
  99         if episode_path:
 100             query = query % '''title
 101     getVideoBySlug(slug:"%s") {
 102       _id
 103       auth
 104       description
 105       duration
 106       episodeNumber
 107       launchDate
 108       mediaID
 109       seasonNumber
 110       poster
 111       title
 112       tvRating
 113     }''' % episode_path
 114             ['getVideoBySlug']
 115         else:
 116             query = query % '''metaDescription
 117     title
 118     videos(first:1000,sort:["episode_number"]) {
 119       edges {
 120         node {
 121            _id
 122            slug
 123         }
 124       }
 125     }'''
 126         show_data = self._download_json(
 127             'https://www.adultswim.com/api/search', display_id,
 128             data=json.dumps({'query': query}).encode(),
 129             headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
 130         if episode_path:
 131             video_data = show_data['getVideoBySlug']
 132             video_id = video_data['_id']
 133             episode_title = title = video_data['title']
 134             series = show_data.get('title')
 135             if series:
 136                 title = '%s - %s' % (series, title)
 137             info = {
 138                 'id': video_id,
 139                 'title': title,
 140                 'description': strip_or_none(video_data.get('description')),
 141                 'duration': float_or_none(video_data.get('duration')),
 142                 'formats': [],
 143                 'subtitles': {},
 144                 'age_limit': parse_age_limit(video_data.get('tvRating')),
 145                 'thumbnail': video_data.get('poster'),
 146                 'timestamp': parse_iso8601(video_data.get('launchDate')),
 147                 'series': series,
 148                 'season_number': int_or_none(video_data.get('seasonNumber')),
 149                 'episode': episode_title,
 150                 'episode_number': int_or_none(video_data.get('episodeNumber')),
 151             }
 152
 153             auth = video_data.get('auth')
 154             media_id = video_data.get('mediaID')
 155             if media_id:
 156                 info.update(self._extract_ngtv_info(media_id, {
 157                     # CDN_TOKEN_APP_ID from:
 158                     # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
 159                     'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
 160                 }, {
 161                     'url': url,
 162                     'site_name': 'AdultSwim',
 163                     'auth_required': auth,
 164                 }))
 165
 166             if not auth:
 167                 extract_data = self._download_json(
 168                     'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
 169                     video_id, query={'fields': 'stream'}, fatal=False) or {}
 170                 assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
 171                 for asset in assets:
 172                     asset_url = asset.get('url')
 173                     if not asset_url:
 174                         continue
 175                     ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
 176                     if ext == 'm3u8':
 177                         info['formats'].extend(self._extract_m3u8_formats(
 178                             asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
 179                     elif ext == 'f4m':
 180                         continue
 181                         # info['formats'].extend(self._extract_f4m_formats(
 182                         #     asset_url, video_id, f4m_id='hds', fatal=False))
 183                     elif ext in ('scc', 'ttml', 'vtt'):
 184                         info['subtitles'].setdefault('en', []).append({
 185                             'url': asset_url,
 186                         })
 187             self._sort_formats(info['formats'])
 188
 189             return info
 190         else:
 191             entries = []
 192             for edge in show_data.get('videos', {}).get('edges', []):
 193                 video = edge.get('node') or {}
 194                 slug = video.get('slug')
 195                 if not slug:
 196                     continue
 197                 entries.append(self.url_result(
 198                     'http://adultswim.com/videos/%s/%s' % (show_path, slug),
 199                     'AdultSwim', video.get('_id')))
 200             return self.playlist_result(
 201                 entries, show_path, show_data.get('title'),
 202                 strip_or_none(show_data.get('metaDescription')))