[gamekings] Improve extraction

author Sergey M․ <dstftw@gmail.com>

Sun, 8 Feb 2015 17:03:12 +0000 (23:03 +0600)

committer Sergey M․ <dstftw@gmail.com>

Sun, 8 Feb 2015 17:03:12 +0000 (23:03 +0600)
author Sergey M․ <dstftw@gmail.com>
Sun, 8 Feb 2015 17:03:12 +0000 (23:03 +0600)
committer Sergey M․ <dstftw@gmail.com>
Sun, 8 Feb 2015 17:03:12 +0000 (23:03 +0600)
diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py

index 1821f26d8c2da74e14b8dbaa498c501dd23ffb01..929779f60c2d1a847d77d46b20bf8ebbfff31bfc 100644 (file)
--- a/youtube_dl/extractor/gamekings.py
+++ b/youtube_dl/extractor/gamekings.py
@@ -1,70 +1,64 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      xpath_text,
-    xpath_with_ns
- )
+    xpath_with_ns,
+)
  
  
  class GamekingsIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
-    _TESTS = [
-        {
+    _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<id>[^/]+)'
+    _TESTS = [{
          'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
          # MD5 is flaky, seems to change regularly
          # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
          'info_dict': {
-            'id': '20130811',
+            'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
              'ext': 'mp4',
              'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
              'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
-            }
+            'thumbnail': 're:^https?://.*\.jpg$',
          },
-        {
+    }, {
+        # vimeo video
          'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
+        'md5': '12bf04dfd238e70058046937657ea68d',
          'info_dict': {
-            'id': '118933752',
+            'id': 'the-legend-of-zelda-majoras-mask',
              'ext': 'mp4',
              'title': 'The Legend of Zelda: Majora’s Mask',
-            'description': 'md5:9917825fe0e9f4057601fe1e38860de3'
-            }
-        }
-    ]
+            'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }]
  
      def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
  
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        webpage = self._download_webpage(url, name)
+        playlist_id = self._search_regex(
+            r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
  
-        playlist_id = re.search(r'(?:gogoVideo)\(\d+,"?(?P<playlist_id>.*)"', webpage, re.MULTILINE).group('playlist_id')
-        playlist_url = 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=' + playlist_id
-        playlist_rss = self._download_xml(playlist_url, playlist_id)
-        
+        playlist = self._download_xml(
+            'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
+            video_id)
  
          NS_MAP = {
              'jwplayer': 'http://rss.jwpcdn.com/'
-         }
+        }
+
+        item = playlist.find('./channel/item')
  
-        item = playlist_rss.find('./channel/item')
-        
-        image = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'image')
-        file_node = item.find(xpath_with_ns('./jwplayer:source', NS_MAP))
-        
-        video_url = file_node.get('file')
-        video = re.search(r'[0-9]+', video_url)
-        video_id = video.group(0)
-        
-        # Todo: Add medium format
+        thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
+        video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
  
          return {
              'id': video_id,
-            'ext': 'mp4',
              'url': video_url,
              'title': self._og_search_title(webpage),
              'description': self._og_search_description(webpage),
-            'thumbnail': image
+            'thumbnail': thumbnail,
          }
author	Sergey M․ <dstftw@gmail.com>
	Sun, 8 Feb 2015 17:03:12 +0000 (23:03 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Sun, 8 Feb 2015 17:03:12 +0000 (23:03 +0600)