youtube_dl/extractor/vshare.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..compat import compat_chr
   8 from ..utils import (
   9     decode_packed_codes,
  10     ExtractorError,
  11 )
  12
  13
  14 class VShareIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
  16     _TESTS = [{
  17         'url': 'https://vshare.io/d/0f64ce6',
  18         'md5': '17b39f55b5497ae8b59f5fbce8e35886',
  19         'info_dict': {
  20             'id': '0f64ce6',
  21             'title': 'vl14062007715967',
  22             'ext': 'mp4',
  23         }
  24     }, {
  25         'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
  26         'only_matching': True,
  27     }]
  28
  29     @staticmethod
  30     def _extract_urls(webpage):
  31         return re.findall(
  32             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
  33             webpage)
  34
  35     def _extract_packed(self, webpage):
  36         packed = self._search_regex(
  37             r'(eval\(function.+)', webpage, 'packed code')
  38         unpacked = decode_packed_codes(packed)
  39         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
  40         digits = [int(digit) for digit in digits.split(',')]
  41         key_digit = self._search_regex(
  42             r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
  43         chars = [compat_chr(d - int(key_digit)) for d in digits]
  44         return ''.join(chars)
  45
  46     def _real_extract(self, url):
  47         video_id = self._match_id(url)
  48
  49         webpage = self._download_webpage(
  50             'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
  51             video_id, headers={'Referer': url})
  52
  53         title = self._html_search_regex(
  54             r'<title>([^<]+)</title>', webpage, 'title')
  55         title = title.split(' - ')[0]
  56
  57         error = self._html_search_regex(
  58             r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
  59             'error', default=None)
  60         if error:
  61             raise ExtractorError(error, expected=True)
  62
  63         info = self._parse_html5_media_entries(
  64             url, '<video>%s</video>' % self._extract_packed(webpage),
  65             video_id)[0]
  66
  67         self._sort_formats(info['formats'])
  68
  69         info.update({
  70             'id': video_id,
  71             'title': title,
  72         })
  73
  74         return info