]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
Merge pull request #2282 from dstftw/lifenews
authorJaime Marquínez Ferrándiz <jaimeMF@users.noreply.github.com>
Fri, 31 Jan 2014 18:23:46 +0000 (10:23 -0800)
committerJaime Marquínez Ferrándiz <jaimeMF@users.noreply.github.com>
Fri, 31 Jan 2014 18:23:46 +0000 (10:23 -0800)
[lifenews] Add support for lifenews.ru and fix og content extraction regex

README.md
youtube-dl.plugin.zsh
youtube_dl/extractor/collegehumor.py
youtube_dl/extractor/ro220.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/ustream.py
youtube_dl/extractor/vevo.py

index d795ef6f28aa8a3cbc9e7d6c21ce3bc6958aa8de..5200f59fe7b5da4bd5a51374f5eda27afdf4e756 100644 (file)
--- a/README.md
+++ b/README.md
@@ -325,7 +325,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
 
 To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
 
-# BUILD INSTRUCTIONS
+# DEVELOPER INSTRUCTIONS
 
 Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
 
@@ -347,6 +347,10 @@ If you want to create a build of youtube-dl yourself, you'll need
 * zip
 * nosetests
 
+### Adding support for a new site
+
+If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
+
 # BUGS
 
 Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
index 2a1d7527e3b4eb3dee6c7c27ddb1ea56f2a8b69b..4edab5214a415f04659904962dbd4f4dcef580d8 100644 (file)
@@ -18,6 +18,7 @@
 # code is documented here:
 # https://github.com/zsh-users/antigen#notes-on-writing-plugins
 
-# This specific script just adds the downloaded folder to the end of the $PATH,
-# which allows the contained youtube-dl executable to be found.
-export PATH=${PATH}:$(dirname $0)
+# This specific script just aliases youtube-dl to the python script that this
+# library provides. This requires updating the PYTHONPATH to ensure that the
+# full set of code can be located.
+alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
index 026d023a0801190b05b721edabfd426ab2f6cafa..1db27026ac7116f5b228843a96ba8861b4df6def 100644 (file)
@@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):
             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
             'uploader': 'Funnyplox TV',
             'uploader_id': 'funnyploxtv',
-            'description': 'md5:b20fc87608e2837596bbc8df85a3c34d',
+            'description': 'md5:7e8899d3f749db50fa089eb243cba17f',
             'upload_date': '20140128',
         },
         'params': {
index c32f64d99791dbc4d866ad0a2bc681fa5ae3609d..4678f62dfadba9968ff363a919471c869eb35c71 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
     IE_NAME = '220.ro'
     _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
     _TEST = {
-        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
-        u'file': u'LYV6doKo7f.mp4',
-        u'md5': u'03af18b73a07b4088753930db7a34add',
-        u'info_dict': {
-            u"title": u"Luati-le Banii sez 4 ep 1",
-            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+        "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+        'file': 'LYV6doKo7f.mp4',
+        'md5': '03af18b73a07b4088753930db7a34add',
+        'info_dict': {
+            "title": "Luati-le Banii sez 4 ep 1",
+            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
         }
     }
 
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         flashVars_str = self._search_regex(
             r'<param name="flashVars" value="([^"]+)"',
-            webpage, u'flashVars')
+            webpage, 'flashVars')
         flashVars = compat_parse_qs(flashVars_str)
 
-        info = {
+        return {
             '_type': 'video',
             'id': video_id,
             'ext': 'mp4',
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
             'description': clean_html(flashVars['desc'][0]),
             'thumbnail': flashVars['preview'][0],
         }
-        return info
index 051a34d5b8b048db9112ac2df5f3f04115447a87..9156d7fafd6ac2688ab329b31f1b683ab868def0 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -6,20 +8,20 @@ from .common import InfoExtractor
 class SpiegelIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
     _TESTS = [{
-        u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
-        u'file': u'1259285.mp4',
-        u'md5': u'2c2754212136f35fb4b19767d242f66e',
-        u'info_dict': {
-            u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
-        }
+        'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
+        'file': '1259285.mp4',
+        'md5': '2c2754212136f35fb4b19767d242f66e',
+        'info_dict': {
+            'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
+        },
     },
     {
-        u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
-        u'file': u'1309159.mp4',
-        u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
-        u'info_dict': {
-            u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
-        }
+        'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+        'file': '1309159.mp4',
+        'md5': 'f2cdf638d7aa47654e251e1aee360af1',
+        'info_dict': {
+            'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
+        },
     }]
 
     def _real_extract(self, url):
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._html_search_regex(
-            r'<div class="module-title">(.*?)</div>', webpage, u'title')
+            r'<div class="module-title">(.*?)</div>', webpage, 'title')
 
-        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+        xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
         idoc = self._download_xml(
             xml_url, video_id,
-            note=u'Downloading XML', errnote=u'Failed to download XML')
+            note='Downloading XML', errnote='Failed to download XML')
 
         formats = [
             {
                 'format_id': n.tag.rpartition('type')[2],
-                'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+                'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
                 'width': int(n.find('./width').text),
                 'height': int(n.find('./height').text),
                 'abr': int(n.find('./audiobitrate').text),
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        info = {
+        return {
             'id': video_id,
             'title': video_title,
             'duration': duration,
             'formats': formats,
         }
-        return info
index 74c82587f802acc82e3025235a4bfe92725e8111..7fa2b9e159ed1a60c056140f05f51851663830e9 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import json
 import re
 
@@ -10,48 +12,48 @@ from ..utils import (
 
 class UstreamIE(InfoExtractor):
     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
-    IE_NAME = u'ustream'
+    IE_NAME = 'ustream'
     _TEST = {
-        u'url': u'http://www.ustream.tv/recorded/20274954',
-        u'file': u'20274954.flv',
-        u'md5': u'088f151799e8f572f84eb62f17d73e5c',
-        u'info_dict': {
-            u"uploader": u"Young Americans for Liberty", 
-            u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
-        }
+        'url': 'http://www.ustream.tv/recorded/20274954',
+        'file': '20274954.flv',
+        'md5': '088f151799e8f572f84eb62f17d73e5c',
+        'info_dict': {
+            "uploader": "Young Americans for Liberty",
+            "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+        },
     }
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('videoID')
 
-        video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
+        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
         self.report_extraction(video_id)
 
         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
-            webpage, u'title')
+            webpage, 'title')
 
         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
-            webpage, u'uploader', fatal=False, flags=re.DOTALL)
+            webpage, 'uploader', fatal=False, flags=re.DOTALL)
 
         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
-            webpage, u'thumbnail', fatal=False)
-
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': 'flv',
-                'title': video_title,
-                'uploader': uploader,
-                'thumbnail': thumbnail,
-               }
-        return info
+            webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'uploader': uploader,
+            'thumbnail': thumbnail,
+        }
+
 
 class UstreamChannelIE(InfoExtractor):
     _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
-    IE_NAME = u'ustream:channel'
+    IE_NAME = 'ustream:channel'
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
index a4b26a26f4132840c57700fad96785dfb390a8db..f0673972c4632248a367c5ed5ca3de3a1f0a093b 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json
 import xml.etree.ElementTree
@@ -22,16 +24,16 @@ class VevoIE(InfoExtractor):
            vevo:)
         (?P<id>[^&?#]+)'''
     _TESTS = [{
-        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
-        u'file': u'GB1101300280.mp4',
-        u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
-        u'info_dict': {
-            u"upload_date": u"20130624",
-            u"uploader": u"Hurts",
-            u"title": u"Somebody to Die For",
-            u"duration": 230.12,
-            u"width": 1920,
-            u"height": 1080,
+        'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+        'file': 'GB1101300280.mp4',
+        "md5": "06bea460acb744eab74a9d7dcb4bfd61",
+        'info_dict': {
+            "upload_date": "20130624",
+            "uploader": "Hurts",
+            "title": "Somebody to Die For",
+            "duration": 230.12,
+            "width": 1920,
+            "height": 1080,
         }
     }]
     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@@ -44,7 +46,7 @@ class VevoIE(InfoExtractor):
                 if version['version'] > last_version['version']:
                     last_version = version
         if last_version['version'] == -1:
-            raise ExtractorError(u'Unable to extract last version of the video')
+            raise ExtractorError('Unable to extract last version of the video')
 
         renditions = xml.etree.ElementTree.fromstring(last_version['data'])
         formats = []
@@ -85,7 +87,7 @@ class VevoIE(InfoExtractor):
             format_url = self._SMIL_BASE_URL + m.group('path')
             formats.append({
                 'url': format_url,
-                'format_id': u'SMIL_' + m.group('cbr'),
+                'format_id': 'SMIL_' + m.group('cbr'),
                 'vcodec': m.group('vcodec'),
                 'acodec': m.group('acodec'),
                 'vbr': int(m.group('vbr')),
@@ -101,26 +103,25 @@ class VevoIE(InfoExtractor):
         video_id = mobj.group('id')
 
         json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
-        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-        video_info = json.loads(info_json)['video']
+        video_info = self._download_json(json_url, video_id)['video']
 
         formats = self._formats_from_json(video_info)
         try:
             smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
                 self._SMIL_BASE_URL, video_id, video_id.lower())
             smil_xml = self._download_webpage(smil_url, video_id,
-                                              u'Downloading SMIL info')
+                                              'Downloading SMIL info')
             formats.extend(self._formats_from_smil(smil_xml))
         except ExtractorError as ee:
             if not isinstance(ee.cause, compat_HTTPError):
                 raise
             self._downloader.report_warning(
-                u'Cannot download SMIL information, falling back to JSON ..')
+                'Cannot download SMIL information, falling back to JSON ..')
 
         timestamp_ms = int(self._search_regex(
-            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
+            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
         upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
-        info = {
+        return {
             'id': video_id,
             'title': video_info['title'],
             'formats': formats,
@@ -129,5 +130,3 @@ class VevoIE(InfoExtractor):
             'uploader': video_info['mainArtists'][0]['artistName'],
             'duration': video_info['duration'],
         }
-
-        return info