]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
[generic] Try parsing JWPlayer embedded videos (closes #12030)
authorYen Chi Hsuan <yan12125@gmail.com>
Thu, 16 Feb 2017 15:42:36 +0000 (23:42 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Thu, 16 Feb 2017 15:44:03 +0000 (23:44 +0800)
16 files changed:
ChangeLog
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/ondemandkorea.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornoxo.py
youtube_dl/extractor/rentv.py
youtube_dl/extractor/rudo.py
youtube_dl/extractor/screencastomatic.py
youtube_dl/extractor/sendtonews.py
youtube_dl/extractor/thisav.py
youtube_dl/extractor/tvnoe.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/wimp.py

index 8ef8a8307225731d9b9d738e4c826c71f9d3278e..4e69b03d0e940abc32fac2ad27ef279316c88d46 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Extractors
++ [generic] Support complex JWPlayer embedded videos (#12030)
+
+
 version 2017.02.16
 
 Core
index 486dff82d00a44a13384e3b7d8ff1b0189da8451..e21045bed9f7eb161d2dbacd02008e8ac11f9ec3 100644 (file)
@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     unified_strdate,
     clean_html,
 )
 
 
-class ArchiveOrgIE(JWPlatformBaseIE):
+class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
index 9681453ca397fa495d21b5a691701255fd18df33..f6ff56eda7b36136c564d48bd7ee0516d1729c65 100644 (file)
@@ -40,6 +40,7 @@ from ..utils import (
     fix_xml_ampersands,
     float_or_none,
     int_or_none,
+    js_to_json,
     parse_iso8601,
     RegexNotFoundError,
     sanitize_filename,
@@ -2073,6 +2074,123 @@ class InfoExtractor(object):
                     })
         return formats
 
+    @staticmethod
+    def _find_jwplayer_data(webpage):
+        mobj = re.search(
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+            webpage)
+        if mobj:
+            return mobj.group('options')
+
+    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+        jwplayer_data = self._parse_json(
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
+        return self._parse_jwplayer_data(
+            jwplayer_data, video_id, *args, **kwargs)
+
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        # JWPlayer backward compatibility: flattened playlists
+        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+        if 'playlist' not in jwplayer_data:
+            jwplayer_data = {'playlist': [jwplayer_data]}
+
+        entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+        for video_data in jwplayer_data['playlist']:
+            # JWPlayer backward compatibility: flattened sources
+            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+            if 'sources' not in video_data:
+                video_data['sources'] = [video_data]
+
+            this_video_id = video_id or video_data['mediaid']
+
+            formats = []
+            for source in video_data['sources']:
+                source_url = self._proto_relative_url(source['file'])
+                if base_url:
+                    source_url = compat_urlparse.urljoin(base_url, source_url)
+                source_type = source.get('type') or ''
+                ext = mimetype2ext(source_type) or determine_ext(source_url)
+                if source_type == 'hls' or ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
+                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                    formats.append({
+                        'url': source_url,
+                        'vcodec': 'none',
+                        'ext': ext,
+                    })
+                else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
+                    a_format = {
+                        'url': source_url,
+                        'width': int_or_none(source.get('width')),
+                        'height': height,
+                        'ext': ext,
+                    }
+                    if source_url.startswith('rtmp'):
+                        a_format['ext'] = 'flv'
+
+                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                        # of jwplayer.flash.swf
+                        rtmp_url_parts = re.split(
+                            r'((?:mp4|mp3|flv):)', source_url, 1)
+                        if len(rtmp_url_parts) == 3:
+                            rtmp_url, prefix, play_path = rtmp_url_parts
+                            a_format.update({
+                                'url': rtmp_url,
+                                'play_path': prefix + play_path,
+                            })
+                        if rtmp_params:
+                            a_format.update(rtmp_params)
+                    formats.append(a_format)
+            self._sort_formats(formats)
+
+            subtitles = {}
+            tracks = video_data.get('tracks')
+            if tracks and isinstance(tracks, list):
+                for track in tracks:
+                    if track.get('kind') != 'captions':
+                        continue
+                    track_url = urljoin(base_url, track.get('file'))
+                    if not track_url:
+                        continue
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track_url)
+                    })
+
+            entries.append({
+                'id': this_video_id,
+                'title': video_data['title'] if require_title else video_data.get('title'),
+                'description': video_data.get('description'),
+                'thumbnail': self._proto_relative_url(video_data.get('image')),
+                'timestamp': int_or_none(video_data.get('pubdate')),
+                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+                'subtitles': subtitles,
+                'formats': formats,
+            })
+        if len(entries) == 1:
+            return entries[0]
+        else:
+            return self.playlist_result(entries)
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
index a2b0298ecb915467ae221b4fb01c9febd4b6be04..3db31debea88aee29718a56cdc38109487dd9c8e 100644 (file)
@@ -20,6 +20,7 @@ from ..utils import (
     float_or_none,
     HEADRequest,
     is_html,
+    js_to_json,
     orderedSet,
     sanitized_Request,
     smuggle_url,
@@ -961,6 +962,16 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        # Complex jwplayer
+        {
+            'url': 'http://www.indiedb.com/games/king-machine/videos',
+            'info_dict': {
+                'id': 'videos',
+                'ext': 'mp4',
+                'title': 'king machine trailer 1',
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+        },
         # rtl.nl embed
         {
             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -2488,6 +2499,15 @@ class GenericIE(InfoExtractor):
                 self._sort_formats(entry['formats'])
             return self.playlist_result(entries)
 
+        jwplayer_data_str = self._find_jwplayer_data(webpage)
+        if jwplayer_data_str:
+            try:
+                jwplayer_data = self._parse_json(
+                    jwplayer_data_str, video_id, transform_source=js_to_json)
+                return self._parse_jwplayer_data(jwplayer_data, video_id)
+            except ExtractorError:
+                pass
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
index aff7ab49a9500c8bdabe78fac393eb30ef827db5..33d55f7706d79e0b87a9830ae9abce3de6f33826 100644 (file)
@@ -4,139 +4,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
-    determine_ext,
-    float_or_none,
-    int_or_none,
-    js_to_json,
-    mimetype2ext,
-    urljoin,
-)
 
 
-class JWPlatformBaseIE(InfoExtractor):
-    @staticmethod
-    def _find_jwplayer_data(webpage):
-        # TODO: Merge this with JWPlayer-related codes in generic.py
-
-        mobj = re.search(
-            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
-            webpage)
-        if mobj:
-            return mobj.group('options')
-
-    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
-        jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id,
-            transform_source=js_to_json)
-        return self._parse_jwplayer_data(
-            jwplayer_data, video_id, *args, **kwargs)
-
-    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
-                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
-        # JWPlayer backward compatibility: flattened playlists
-        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
-        if 'playlist' not in jwplayer_data:
-            jwplayer_data = {'playlist': [jwplayer_data]}
-
-        entries = []
-
-        # JWPlayer backward compatibility: single playlist item
-        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
-        if not isinstance(jwplayer_data['playlist'], list):
-            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
-
-        for video_data in jwplayer_data['playlist']:
-            # JWPlayer backward compatibility: flattened sources
-            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
-            if 'sources' not in video_data:
-                video_data['sources'] = [video_data]
-
-            this_video_id = video_id or video_data['mediaid']
-
-            formats = []
-            for source in video_data['sources']:
-                source_url = self._proto_relative_url(source['file'])
-                if base_url:
-                    source_url = compat_urlparse.urljoin(base_url, source_url)
-                source_type = source.get('type') or ''
-                ext = mimetype2ext(source_type) or determine_ext(source_url)
-                if source_type == 'hls' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
-                elif ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
-                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-                    formats.append({
-                        'url': source_url,
-                        'vcodec': 'none',
-                        'ext': ext,
-                    })
-                else:
-                    height = int_or_none(source.get('height'))
-                    if height is None:
-                        # Often no height is provided but there is a label in
-                        # format like 1080p.
-                        height = int_or_none(self._search_regex(
-                            r'^(\d{3,})[pP]$', source.get('label') or '',
-                            'height', default=None))
-                    a_format = {
-                        'url': source_url,
-                        'width': int_or_none(source.get('width')),
-                        'height': height,
-                        'ext': ext,
-                    }
-                    if source_url.startswith('rtmp'):
-                        a_format['ext'] = 'flv'
-
-                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
-                        # of jwplayer.flash.swf
-                        rtmp_url_parts = re.split(
-                            r'((?:mp4|mp3|flv):)', source_url, 1)
-                        if len(rtmp_url_parts) == 3:
-                            rtmp_url, prefix, play_path = rtmp_url_parts
-                            a_format.update({
-                                'url': rtmp_url,
-                                'play_path': prefix + play_path,
-                            })
-                        if rtmp_params:
-                            a_format.update(rtmp_params)
-                    formats.append(a_format)
-            self._sort_formats(formats)
-
-            subtitles = {}
-            tracks = video_data.get('tracks')
-            if tracks and isinstance(tracks, list):
-                for track in tracks:
-                    if track.get('kind') != 'captions':
-                        continue
-                    track_url = urljoin(base_url, track.get('file'))
-                    if not track_url:
-                        continue
-                    subtitles.setdefault(track.get('label') or 'en', []).append({
-                        'url': self._proto_relative_url(track_url)
-                    })
-
-            entries.append({
-                'id': this_video_id,
-                'title': video_data['title'] if require_title else video_data.get('title'),
-                'description': video_data.get('description'),
-                'thumbnail': self._proto_relative_url(video_data.get('image')),
-                'timestamp': int_or_none(video_data.get('pubdate')),
-                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
-                'subtitles': subtitles,
-                'formats': formats,
-            })
-        if len(entries) == 1:
-            return entries[0]
-        else:
-            return self.playlist_result(entries)
-
-
-class JWPlatformIE(JWPlatformBaseIE):
+class JWPlatformIE(InfoExtractor):
     _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
     _TEST = {
         'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
index de1d6b08a409ef79272393ccd3f98607f436e4a3..dcd157777514022a120e9aa446a6299f64518251 100644 (file)
@@ -1,14 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     js_to_json,
 )
 
 
-class OnDemandKoreaIE(JWPlatformBaseIE):
+class OnDemandKoreaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
index 7a2737032ff27a73825a7787feece5d52a92507d..9b413590a4078b9e962edc63912a85fbc8312523 100644 (file)
@@ -169,50 +169,6 @@ class PornHubIE(InfoExtractor):
         comment_count = self._extract_count(
             r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
 
-        """
-        video_variables = {}
-        for video_variablename, quote, video_variable in re.findall(
-                r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
-            video_variables[video_variablename] = video_variable
-
-        video_urls = []
-        for encoded_video_url in re.findall(
-                r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
-            for varname, varval in video_variables.items():
-                encoded_video_url = encoded_video_url.replace(varname, varval)
-            video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))
-
-        if webpage.find('"encrypted":true') != -1:
-            password = compat_urllib_parse_unquote_plus(
-                self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
-            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
-
-        formats = []
-        for video_url in video_urls:
-            path = compat_urllib_parse_urlparse(video_url).path
-            extension = os.path.splitext(path)[1][1:]
-            format = path.split('/')[5].split('_')[:2]
-            format = '-'.join(format)
-
-            m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
-            if m is None:
-                height = None
-                tbr = None
-            else:
-                height = int(m.group('height'))
-                tbr = int(m.group('tbr'))
-
-            formats.append({
-                'url': video_url,
-                'ext': extension,
-                'format': format,
-                'format_id': format,
-                'tbr': tbr,
-                'height': height,
-            })
-        self._sort_formats(formats)
-        """
-
         page_params = self._parse_json(self._search_regex(
             r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
             webpage, 'page parameters', group='data', default='{}'),
index 1a0cce7e0274bb4a06bf9b0604d9ebdf75cf3df5..2831368b6a85e1aa7926c8721560f1e1dc8be5f1 100644 (file)
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     str_to_int,
 )
 
 
-class PornoXOIE(JWPlatformBaseIE):
+class PornoXOIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
index 422c02cff37620cbc52f7056f92987682f394c30..d338b3a933cf10fb50621ea4d785d4e83324466c 100644 (file)
@@ -2,11 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .jwplatform import JWPlatformBaseIE
 from ..compat import compat_str
 
 
-class RENTVIE(JWPlatformBaseIE):
+class RENTVIE(InfoExtractor):
     _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://ren.tv/video/epizod/118577',
index 3bfe934d82c9db7fe7ff8b1b8820d4c96ca0cb32..51644011e5d5587462ef56a0511767437d93b77e 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     js_to_json,
     get_element_by_class,
@@ -11,7 +11,7 @@ from ..utils import (
 )
 
 
-class RudoIE(JWPlatformBaseIE):
+class RudoIE(InfoExtractor):
     _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
 
     _TEST = {
index 94a2a37d20696fa3ffc65b6f1df04cab42c7785d..b5e76c9af04d0c98a1214ec3ce8255b7d6c7d4e6 100644 (file)
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import js_to_json
 
 
-class ScreencastOMaticIE(JWPlatformBaseIE):
+class ScreencastOMaticIE(InfoExtractor):
     _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
     _TEST = {
         'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
index 9880a5a78c1f4b18d41c55e1899405dbdb98e7dc..9d9652949bb64ca2a15c02b3e0733dd9b7f42493 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     float_or_none,
     parse_iso8601,
@@ -14,7 +14,7 @@ from ..utils import (
 )
 
 
-class SendtoNewsIE(JWPlatformBaseIE):
+class SendtoNewsIE(InfoExtractor):
     _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
 
     _TEST = {
index 4473a3c773c3d9c4c26361e907769e8bb1ac9fad..b7b3568cb3f5de7f3eced52895f16bf069663086 100644 (file)
@@ -3,11 +3,11 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import remove_end
 
 
-class ThisAVIE(JWPlatformBaseIE):
+class ThisAVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
     _TESTS = [{
         'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
index 6d5c748264197ba99a2be8fefccf375761818610..1a5b76bf2ebd069bc36ec107c61adaa99c4c180c 100644 (file)
@@ -1,7 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     clean_html,
     get_element_by_class,
@@ -9,7 +9,7 @@ from ..utils import (
 )
 
 
-class TVNoeIE(JWPlatformBaseIE):
+class TVNoeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.tvnoe.cz/video/10362',
index 9950c62ad636ee4f03389bef627da4318f019c22..1f1828fce478d92fb4b3134f86eebb1cbb03760c 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     decode_packed_codes,
     js_to_json,
@@ -12,7 +12,7 @@ from ..utils import (
 )
 
 
-class VidziIE(JWPlatformBaseIE):
+class VidziIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
index 54eb5142793827f8b733592d22b979d326593bee..c022fb33e94ef7f9e6f0e90d73300f866e8ffc76 100644 (file)
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
+from .common import InfoExtractor
 from .youtube import YoutubeIE
-from .jwplatform import JWPlatformBaseIE
 
 
-class WimpIE(JWPlatformBaseIE):
+class WimpIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.wimp.com/maru-is-exhausted/',