]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Add support for http://www.spankwire.com
authorrzhxeo <rzhxeot7z81b4700@mailcatch.com>
Sat, 26 Oct 2013 23:59:26 +0000 (01:59 +0200)
committerrzhxeo <rzhxeot7z81b4700@mailcatch.com>
Sat, 26 Oct 2013 23:59:26 +0000 (01:59 +0200)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/spankwire.py [new file with mode: 0644]

index db69af361929fd7ff726d1a1df980730cad3630c..7a60e09377d44fba40172980f624639f177c1db6 100644 (file)
@@ -109,6 +109,7 @@ from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import SouthParkStudiosIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import SouthParkStudiosIE
+from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
new file mode 100644 (file)
index 0000000..f0d5009
--- /dev/null
@@ -0,0 +1,70 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class SpankwireIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _TEST = {
+        u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+        u'file': u'103545.mp4',
+        u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
+        u'info_dict': {
+            u"uploader": u"oreusz", 
+            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
+            u"description": u"Crazy Bitch X rated music video.",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+        description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
+        if len(description) == 0:
+            description = None
+
+        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
+        if webpage.find('flashvars\.encrypted = "true"') != -1:
+            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
+
+        formats = []
+        for video_url in video_urls:
+            path = compat_urllib_parse_urlparse( video_url ).path
+            extension = os.path.splitext( path )[1][1:]
+            format = path.split('/')[4].split('_')[:2]
+            format = "-".join( format )
+            formats.append({
+                'url': video_url,
+                'ext': extension,
+                'format': format,
+                'format_id': format,
+            })
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': description,
+            'formats': formats,
+        }