]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/vk.py
[jwplatform] Improve duration extraction
[youtube-dl.git] / youtube_dl / extractor / vk.py
index 3cfbd97af48d1bd8028394e5cbde00dba2c34df6..1990e7093acabb2dce11faebfddd220e8d88392b 100644 (file)
@@ -1,9 +1,8 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import collections
 import re
-import json
 import sys
 
 from .common import InfoExtractor
@@ -20,7 +19,7 @@ from ..utils import (
     remove_start,
     str_to_int,
     unescapeHTML,
-    unified_strdate,
+    unified_timestamp,
     urlencode_postdata,
 )
 from .dailymotion import DailymotionIE
@@ -106,6 +105,7 @@ class VKIE(VKBaseIE):
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
+                'timestamp': 1329060660,
                 'upload_date': '20120212',
                 'view_count': int,
             },
@@ -119,6 +119,7 @@ class VKIE(VKBaseIE):
                 'uploader': 'Tom Cruise',
                 'title': 'No name',
                 'duration': 9,
+                'timestamp': 1374374880,
                 'upload_date': '20130721',
                 'view_count': int,
             }
@@ -195,6 +196,7 @@ class VKIE(VKBaseIE):
                 'upload_date': '20150709',
                 'view_count': int,
             },
+            'skip': 'Removed',
         },
         {
             # youtube embed
@@ -237,6 +239,7 @@ class VKIE(VKBaseIE):
                 'ext': 'mp4',
                 'title': 'S-Dance, репетиции к The way show',
                 'uploader': 'THE WAY SHOW | 17 апреля',
+                'timestamp': 1454870100,
                 'upload_date': '20160207',
                 'view_count': int,
             },
@@ -251,6 +254,7 @@ class VKIE(VKBaseIE):
                 'title': 'ИгроМир 2016 — день 1',
                 'uploader': 'Игромания',
                 'duration': 5239,
+                'view_count': int,
             },
         },
         {
@@ -336,7 +340,7 @@ class VKIE(VKBaseIE):
         if youtube_url:
             return self.url_result(youtube_url, 'Youtube')
 
-        vimeo_url = VimeoIE._extract_vimeo_url(url, info_page)
+        vimeo_url = VimeoIE._extract_url(url, info_page)
         if vimeo_url is not None:
             return self.url_result(vimeo_url)
 
@@ -364,21 +368,31 @@ class VKIE(VKBaseIE):
                     opts_url = 'http:' + opts_url
                 return self.url_result(opts_url)
 
-        data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
-        data = json.loads(data_json)
+        # vars does not look to be served anymore since 24.10.2016
+        data = self._parse_json(
+            self._search_regex(
+                r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
+            video_id, fatal=False)
+
+        # <!json> is served instead
+        if not data:
+            data = self._parse_json(
+                self._search_regex(
+                    r'<!json>\s*({.+?})\s*<!>', info_page, 'json'),
+                video_id)['player']['params'][0]
 
         title = unescapeHTML(data['md_title'])
 
         if data.get('live') == 2:
             title = self._live_title(title)
 
-        # Extract upload date
-        upload_date = unified_strdate(self._html_search_regex(
-            r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None))
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+            'upload date', fatal=False))
 
-        view_count = str_to_int(self._html_search_regex(
-            r'class="mv_views_count[^>]*>([\d,.]+)',
-            info_page, 'view count', default=None))
+        view_count = str_to_int(self._search_regex(
+            r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+            info_page, 'view count', fatal=False))
 
         formats = []
         for format_id, format_url in data.items():
@@ -411,7 +425,7 @@ class VKIE(VKBaseIE):
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
             'duration': data.get('duration'),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'view_count': view_count,
         }