youtube_dl/extractor/dtube.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5 import re
   6 from socket import timeout
   7
   8 from .common import InfoExtractor
   9 from ..utils import (
  10     int_or_none,
  11     parse_iso8601,
  12 )
  13
  14
  15 class DTubeIE(InfoExtractor):
  16     _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
  17     _TEST = {
  18         'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
  19         'md5': '9f29088fa08d699a7565ee983f56a06e',
  20         'info_dict': {
  21             'id': 'x380jtr1',
  22             'ext': 'mp4',
  23             'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
  24             'description': 'md5:60be222088183be3a42f196f34235776',
  25             'uploader_id': 'broncnutz',
  26             'upload_date': '20190107',
  27             'timestamp': 1546854054,
  28         },
  29         'params': {
  30             'format': '480p',
  31         },
  32     }
  33
  34     def _real_extract(self, url):
  35         uploader_id, video_id = re.match(self._VALID_URL, url).groups()
  36         result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
  37             'jsonrpc': '2.0',
  38             'method': 'get_content',
  39             'params': [uploader_id, video_id],
  40         }).encode())['result']
  41
  42         metadata = json.loads(result['json_metadata'])
  43         video = metadata['video']
  44         content = video['content']
  45         info = video.get('info', {})
  46         title = info.get('title') or result['title']
  47
  48         def canonical_url(h):
  49             if not h:
  50                 return None
  51             return 'https://video.dtube.top/ipfs/' + h
  52
  53         formats = []
  54         for q in ('240', '480', '720', '1080', ''):
  55             video_url = canonical_url(content.get('video%shash' % q))
  56             if not video_url:
  57                 continue
  58             format_id = (q + 'p') if q else 'Source'
  59             try:
  60                 self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
  61                 self._downloader._opener.open(video_url, timeout=5).close()
  62             except timeout:
  63                 self.to_screen(
  64                     '%s: %s URL is invalid, skipping' % (video_id, format_id))
  65                 continue
  66             formats.append({
  67                 'format_id': format_id,
  68                 'url': video_url,
  69                 'height': int_or_none(q),
  70                 'ext': 'mp4',
  71             })
  72
  73         return {
  74             'id': video_id,
  75             'title': title,
  76             'description': content.get('description'),
  77             'thumbnail': canonical_url(info.get('snaphash')),
  78             'tags': content.get('tags') or metadata.get('tags'),
  79             'duration': info.get('duration'),
  80             'formats': formats,
  81             'timestamp': parse_iso8601(result.get('created')),
  82             'uploader_id': uploader_id,
  83         }