]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/vshare.py
[cbs] extract smpte and vtt subtitles
[youtube-dl.git] / youtube_dl / extractor / vshare.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_chr
8 from ..utils import (
9     decode_packed_codes,
10     ExtractorError,
11 )
12
13
14 class VShareIE(InfoExtractor):
15     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
16     _TESTS = [{
17         'url': 'https://vshare.io/d/0f64ce6',
18         'md5': '17b39f55b5497ae8b59f5fbce8e35886',
19         'info_dict': {
20             'id': '0f64ce6',
21             'title': 'vl14062007715967',
22             'ext': 'mp4',
23         }
24     }, {
25         'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
26         'only_matching': True,
27     }]
28
29     @staticmethod
30     def _extract_urls(webpage):
31         return re.findall(
32             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
33             webpage)
34
35     def _extract_packed(self, webpage):
36         packed = self._search_regex(
37             r'(eval\(function.+)', webpage, 'packed code')
38         unpacked = decode_packed_codes(packed)
39         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
40         digits = [int(digit) for digit in digits.split(',')]
41         key_digit = self._search_regex(
42             r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
43         chars = [compat_chr(d - int(key_digit)) for d in digits]
44         return ''.join(chars)
45
46     def _real_extract(self, url):
47         video_id = self._match_id(url)
48
49         webpage = self._download_webpage(
50             'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
51             video_id, headers={'Referer': url})
52
53         title = self._html_search_regex(
54             r'<title>([^<]+)</title>', webpage, 'title')
55         title = title.split(' - ')[0]
56
57         error = self._html_search_regex(
58             r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
59             'error', default=None)
60         if error:
61             raise ExtractorError(error, expected=True)
62
63         info = self._parse_html5_media_entries(
64             url, '<video>%s</video>' % self._extract_packed(webpage),
65             video_id)[0]
66
67         self._sort_formats(info['formats'])
68
69         info.update({
70             'id': video_id,
71             'title': title,
72         })
73
74         return info