From: Jaime Marquínez Ferrándiz Date: Fri, 13 Sep 2013 17:55:49 +0000 (+0200) Subject: Merge pull request #1413 from tewe/master X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=74ac9bdd82b8a625ea9782251258ab7da1463877;hp=bfd5c93af9f9eee938c628f19c997f999f21c74e;p=youtube-dl.git Merge pull request #1413 from tewe/master Add Ustream channel support --- diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 6e3595366..b390c7e2e 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -32,9 +32,9 @@ tests = [ # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), - # 82 - vflZK4ZYR 2013/08/23 + # 82 - vflGNjMhJ 2013/09/12 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", - "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), + ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"), # 81 - vflLC8JvQ 2013/07/25 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), diff --git a/test/test_playlists.py b/test/test_playlists.py index 65de3a55c..4a2e00b01 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE +from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE from youtube_dl.utils import * from helper import FakeYDL @@ -34,5 +34,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], u'Vimeo Tributes') self.assertTrue(len(result['entries']) > 24) + def test_ustream_channel(self): + dl = FakeYDL() + ie = UstreamChannelIE(dl) + result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'5124905') + self.assertTrue(len(result['entries']) >= 11) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b289bd9e2..c2f992b8e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -492,6 +492,8 @@ class YoutubeDL(object): self.report_writedescription(descfn) with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) + except (KeyError, TypeError): + self.report_warning(u'There\'s no description to write.') except (OSError, IOError): self.report_error(u'Cannot write description file ' + descfn) return diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a7cddef73..06f9542d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .kankan import KankanIE +from .kickstarter import KickStarterIE from .keek import KeekIE from .liveleak import LiveLeakIE from .livestream import LivestreamIE diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 1f02519a0..1db9b24cf 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,3 +1,4 @@ +# encoding: utf-8 import re import xml.etree.ElementTree @@ -5,24 +6,29 @@ from .common import InfoExtractor from ..utils import unified_strdate class CanalplusIE(InfoExtractor): - _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P\d+)' + _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P.*)|player\.canalplus\.fr/#/(?P\d+))' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' IE_NAME = u'canalplus.fr' _TEST = { - u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861', - u'file': u'889861.flv', - u'md5': u'590a888158b5f0d6832f84001fbf3e99', + u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', + u'file': u'922470.flv', u'info_dict': { - u'title': u'Le Petit Journal 20/06/13 - La guerre des drone', - u'upload_date': u'20130620', + u'title': u'Zapping - 26/08/13', + u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', + u'upload_date': u'20130826', + }, + u'params': { + u'skip_download': True, }, - u'skip': u'Requires rtmpdump' } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + if video_id is None: + webpage = self._download_webpage(url, mobj.group('path')) + video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') info_url = self._VIDEO_INFO_TEMPLATE % video_id info_page = self._download_webpage(info_url,video_id, u'Downloading video info') @@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor): 'ext': 'flv', 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), 'thumbnail': media.find('IMAGES/GRAND').text, + 'description': infos.find('DESCRIPTION').text, + 'view_count': int(infos.find('NB_VUES').text), } diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 7585b7061..cd3bbe65f 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor): u"file": u"6410818.mp4", u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", u"info_dict": { - u"title": u"Arma III - Community Guide: SITREP I", + u"title": u"Arma 3 - Community Guide: SITREP I", u"upload_date": u"20130627", } } diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py new file mode 100644 index 000000000..50bc883ef --- /dev/null +++ b/youtube_dl/extractor/kickstarter.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class KickStarterIE(InfoExtractor): + _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P\d*)/.*' + _TEST = { + u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", + u"file": u"1404461844.mp4", + u"md5": u"c81addca81327ffa66c642b5d8b08cab", + u"info_dict": { + u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + webpage_src = self._download_webpage(url, video_id) + + video_url = self._search_regex(r'data-video="(.*?)">', + webpage_src, u'video URL') + if 'mp4' in video_url: + ext = 'mp4' + else: + ext = 'flv' + video_title = self._html_search_regex(r"(.*?)", + webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() + + results = [{ + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': ext, + }] + return results diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 16cdcc765..f69b27d44 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,9 +1,11 @@ -from HTMLParser import HTMLParser import json import re -from urlparse import urljoin from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + compat_html_parser, +) class UstreamIE(InfoExtractor): @@ -49,7 +51,7 @@ class UstreamIE(InfoExtractor): # More robust than regular expressions -class ChannelParser(HTMLParser): +class ChannelParser(compat_html_parser.HTMLParser): """ """ @@ -65,13 +67,13 @@ class ChannelParser(HTMLParser): if value.isdigit(): self.channel_id = value -class SocialstreamParser(HTMLParser): +class SocialstreamParser(compat_html_parser.HTMLParser): """
  • """ def __init__(self): - HTMLParser.__init__(self) + compat_html_parser.HTMLParser.__init__(self) self.content_ids = [] def handle_starttag(self, tag, attrs): @@ -88,8 +90,6 @@ class UstreamChannelIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url) slug = m.group('slug') - # Slugs can be non-ascii, but youtube-dl can't handle non-ascii command lines, - # so if we got this far it's probably percent encoded and we needn't worry. p = ChannelParser() p.feed(self._download_webpage(url, slug)) @@ -100,16 +100,12 @@ class UstreamChannelIE(InfoExtractor): BASE = 'http://www.ustream.tv' next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id while next_url: - reply = json.loads(self._download_webpage(urljoin(BASE, next_url), channel_id)) + reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id)) p.feed(reply['data']) next_url = reply['nextUrl'] p.close() video_ids = p.content_ids - # From YoutubeChannelIE - - self._downloader.to_screen(u'[ustream] Channel %s: Found %i videos' % (channel_id, len(video_ids))) - urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids] url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls] - return [self.playlist_result(url_entries, channel_id)] + return self.playlist_result(url_entries, channel_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2e0d70eaf..f49665925 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -434,7 +434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: - return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] + return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 80: