X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvk.py;h=1990e7093acabb2dce11faebfddd220e8d88392b;hb=51378d359e790b1c4462c0577fa48dda97dc4c01;hp=77f5cebcfb053823c61d3f1b578bb1e5e80af891;hpb=a7ee8a00f4af9853d06ed895c5023cc6b573fd57;p=youtube-dl.git diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 77f5cebcf..1990e7093 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,9 +1,8 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import collections import re -import json import sys from .common import InfoExtractor @@ -255,6 +254,7 @@ class VKIE(VKBaseIE): 'title': 'ИгроМир 2016 — день 1', 'uploader': 'Игромания', 'duration': 5239, + 'view_count': int, }, }, { @@ -340,7 +340,7 @@ class VKIE(VKBaseIE): if youtube_url: return self.url_result(youtube_url, 'Youtube') - vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) + vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: return self.url_result(vimeo_url) @@ -368,22 +368,31 @@ class VKIE(VKBaseIE): opts_url = 'http:' + opts_url return self.url_result(opts_url) - data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') - data = json.loads(data_json) + # vars does not look to be served anymore since 24.10.2016 + data = self._parse_json( + self._search_regex( + r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'), + video_id, fatal=False) + + # is served instead + if not data: + data = self._parse_json( + self._search_regex( + r'\s*({.+?})\s*', info_page, 'json'), + video_id)['player']['params'][0] title = unescapeHTML(data['md_title']) if data.get('live') == 2: title = self._live_title(title) - # Extract upload date timestamp = unified_timestamp(self._html_search_regex( - r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page, 'upload date', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'class="mv_views_count[^>]*>([\d,.]+)', - info_page, 'view count', default=None)) + view_count = str_to_int(self._search_regex( + r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', + info_page, 'view count', fatal=False)) formats = [] for format_id, format_url in data.items():