If you do not have curl, you can alternatively use a recent wget:
- sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl
+ sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+x /usr/local/bin/youtube-dl
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
else:
msg = note + ', ' + msg
self.assertTrue(m, msg)
+
+
+def assertGreaterEqual(self, got, expected, msg=None):
+ if not (got >= expected):
+ if msg is None:
+ msg = '%r not greater than or equal to %r' % (got, expected)
+ self.assertTrue(got >= expected, msg)
--- /dev/null
+// input: [["a", "b", "c", "d"]]
+// output: ["c", "b", "a", "d"]
+
+package {
+public class ArrayAccess {
+ public static function main(ar:Array):Array {
+ var aa:ArrayAccess = new ArrayAccess();
+ return aa.f(ar, 2);
+ }
+
+ private function f(ar:Array, num:Number):Array{
+ var x:String = ar[0];
+ var y:String = ar[num % ar.length];
+ ar[0] = y;
+ ar[num] = x;
+ return ar;
+ }
+}
+}
--- /dev/null
+// input: []
+// output: 121
+
+package {
+public class ClassCall {
+ public static function main():int{
+ var f:OtherClass = new OtherClass();
+ return f.func(100,20);
+ }
+}
+}
+
+class OtherClass {
+ public function func(x: int, y: int):int {
+ return x+y+1;
+ }
+}
--- /dev/null
+// input: []
+// output: 0
+
+package {
+public class ClassConstruction {
+ public static function main():int{
+ var f:Foo = new Foo();
+ return 0;
+ }
+}
+}
+
+class Foo {
+
+}
--- /dev/null
+// input: [1, 2]
+// output: 3
+
+package {
+public class LocalVars {
+ public static function main(a:int, b:int):int{
+ var c:int = a + b + b;
+ var d:int = c - b;
+ var e:int = d;
+ return e;
+ }
+}
+}
--- /dev/null
+// input: []
+// output: 9
+
+package {
+public class PrivateCall {
+ public static function main():int{
+ var f:OtherClass = new OtherClass();
+ return f.func();
+ }
+}
+}
+
+class OtherClass {
+ private function pf():int {
+ return 9;
+ }
+
+ public function func():int {
+ return this.pf();
+ }
+}
--- /dev/null
+// input: [1]
+// output: 1
+
+package {
+public class StaticAssignment {
+ public static var v:int;
+
+ public static function main(a:int):int{
+ v = a;
+ return v;
+ }
+}
+}
--- /dev/null
+// input: []
+// output: 1
+
+package {
+public class StaticRetrieval {
+ public static var v:int;
+
+ public static function main():int{
+ if (v) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+}
+}
from test.helper import (
assertRegexpMatches,
+ assertGreaterEqual,
expect_info_dict,
FakeYDL,
)
ie = DailymotionUserIE(dl)
result = ie.extract('https://www.dailymotion.com/user/nqtv')
self.assertIsPlaylist(result)
+ assertGreaterEqual(self, len(result['entries']), 100)
self.assertEqual(result['title'], 'Rémi Gaillard')
- self.assertTrue(len(result['entries']) >= 100)
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VineUserIE(dl)
result = ie.extract('https://vine.co/Visa')
self.assertIsPlaylist(result)
- self.assertTrue(len(result['entries']) >= 47)
+ assertGreaterEqual(self, len(result['entries']), 47)
def test_ustream_channel(self):
dl = FakeYDL()
result = ie.extract('http://www.ustream.tv/channel/channeljapan')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10874166')
- self.assertTrue(len(result['entries']) >= 54)
+ assertGreaterEqual(self, len(result['entries']), 54)
def test_soundcloud_set(self):
dl = FakeYDL()
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'The Royal Concept EP')
- self.assertTrue(len(result['entries']) >= 6)
+ assertGreaterEqual(self, len(result['entries']), 6)
def test_soundcloud_user(self):
dl = FakeYDL()
result = ie.extract('https://soundcloud.com/the-concept-band')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865')
- self.assertTrue(len(result['entries']) >= 12)
+ assertGreaterEqual(self, len(result['entries']), 12)
def test_soundcloud_likes(self):
dl = FakeYDL()
result = ie.extract('https://soundcloud.com/the-concept-band/likes')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865')
- self.assertTrue(len(result['entries']) >= 1)
+ assertGreaterEqual(self, len(result['entries']), 1)
def test_soundcloud_playlist(self):
dl = FakeYDL()
result = ie.extract('http://new.livestream.com/tedx/cityenglish')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'TEDCity2.0 (English)')
- self.assertTrue(len(result['entries']) >= 4)
+ assertGreaterEqual(self, len(result['entries']), 4)
def test_livestreamoriginal_folder(self):
dl = FakeYDL()
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
- self.assertTrue(len(result['entries']) >= 28)
+ assertGreaterEqual(self, len(result['entries']), 28)
def test_nhl_videocenter(self):
dl = FakeYDL()
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'pixelversity')
- self.assertTrue(len(result['entries']) >= 60)
+ assertGreaterEqual(self, len(result['entries']), 60)
def test_bandcamp_album(self):
dl = FakeYDL()
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Nightmare Night EP')
- self.assertTrue(len(result['entries']) >= 4)
+ assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_community(self):
dl = FakeYDL()
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'kommuna')
self.assertEqual(result['title'], 'КПРФ')
- self.assertTrue(len(result['entries']) >= 4)
+ assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_user(self):
dl = FakeYDL()
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'inspector')
self.assertEqual(result['title'], 'Inspector')
- self.assertTrue(len(result['entries']) >= 9)
+ assertGreaterEqual(self, len(result['entries']), 9)
def test_AcademicEarthCourse(self):
dl = FakeYDL()
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
- self.assertTrue(len(result['entries']) >= 24)
+ assertGreaterEqual(self, len(result['entries']), 24)
def test_ivi_compilation_season(self):
dl = FakeYDL()
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
- self.assertTrue(len(result['entries']) >= 12)
+ assertGreaterEqual(self, len(result['entries']), 12)
def test_imdb_list(self):
dl = FakeYDL()
self.assertEqual(result['id'], 'cryptography')
self.assertEqual(result['title'], 'Journey into cryptography')
self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
- self.assertTrue(len(result['entries']) >= 3)
+ assertGreaterEqual(self, len(result['entries']), 3)
def test_EveryonesMixtape(self):
dl = FakeYDL()
result = ie.extract('http://rutube.ru/tags/video/1800/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1800')
- self.assertTrue(len(result['entries']) >= 68)
+ assertGreaterEqual(self, len(result['entries']), 68)
def test_rutube_person(self):
dl = FakeYDL()
result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '313878')
- self.assertTrue(len(result['entries']) >= 37)
+ assertGreaterEqual(self, len(result['entries']), 37)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10')
self.assertEqual(result['title'], 'Who are the hackers?')
- self.assertTrue(len(result['entries']) >= 6)
+ assertGreaterEqual(self, len(result['entries']), 6)
def test_toypics_user(self):
dl = FakeYDL()
result = ie.extract('http://videos.toypics.net/Mikey')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'Mikey')
- self.assertTrue(len(result['entries']) >= 17)
+ assertGreaterEqual(self, len(result['entries']), 17)
def test_xtube_user(self):
dl = FakeYDL()
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'greenshowers')
- self.assertTrue(len(result['entries']) >= 155)
+ assertGreaterEqual(self, len(result['entries']), 155)
def test_InstagramUser(self):
dl = FakeYDL()
result = ie.extract('http://instagram.com/porsche')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'porsche')
- self.assertTrue(len(result['entries']) >= 2)
+ assertGreaterEqual(self, len(result['entries']), 2)
test_video = next(
e for e in result['entries']
if e['id'] == '614605558512799803_462752227')
self.assertEqual(result['id'], '152147')
self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News')
- self.assertTrue(len(result['entries']) >= 10)
+ assertGreaterEqual(self, len(result['entries']), 10)
def test_TeacherTubeUser(self):
dl = FakeYDL()
result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2')
- self.assertTrue(len(result['entries']) >= 179)
+ assertGreaterEqual(self, len(result['entries']), 179)
if __name__ == '__main__':
unittest.main()
--- /dev/null
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import errno
+import io
+import json
+import re
+import subprocess
+
+from youtube_dl.swfinterp import SWFInterpreter
+
+
+TEST_DIR = os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), 'swftests')
+
+
+class TestSWFInterpreter(unittest.TestCase):
+ pass
+
+
+def _make_testfunc(testfile):
+ m = re.match(r'^(.*)\.(as)$', testfile)
+ if not m:
+ return
+ test_id = m.group(1)
+
+ def test_func(self):
+ as_file = os.path.join(TEST_DIR, testfile)
+ swf_file = os.path.join(TEST_DIR, test_id + '.swf')
+ if ((not os.path.exists(swf_file))
+ or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+ # Recompile
+ try:
+ subprocess.check_call(['mxmlc', '-output', swf_file, as_file])
+ except OSError as ose:
+ if ose.errno == errno.ENOENT:
+ print('mxmlc not found! Skipping test.')
+ return
+ raise
+
+ with open(swf_file, 'rb') as swf_f:
+ swf_content = swf_f.read()
+ swfi = SWFInterpreter(swf_content)
+
+ with io.open(as_file, 'r', encoding='utf-8') as as_f:
+ as_content = as_f.read()
+
+ def _find_spec(key):
+ m = re.search(
+ r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content)
+ if not m:
+ raise ValueError('Cannot find %s in %s' % (key, testfile))
+ return json.loads(m.group(1))
+
+ input_args = _find_spec('input')
+ output = _find_spec('output')
+
+ swf_class = swfi.extract_class(test_id)
+ func = swfi.extract_function(swf_class, 'main')
+ res = func(input_args)
+ self.assertEqual(res, output)
+
+ test_func.__name__ = str('test_swf_' + test_id)
+ setattr(TestSWFInterpreter, test_func.__name__, test_func)
+
+
+for testfile in os.listdir(TEST_DIR):
+ _make_testfunc(testfile)
+
+if __name__ == '__main__':
+ unittest.main()
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
),
+ (
+ u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
+ u'swf',
+ 86,
+ u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
+ ),
+ (
+ u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
+ u'swf',
+ u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
+ u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
+ ),
]
def make_tfunc(url, stype, sig_input, expected_sig):
- basename = url.rpartition('/')[2]
- m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
- assert m, '%r should follow URL format' % basename
+ m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
+ assert m, '%r should follow URL format' % url
test_id = m.group(1)
def test_func(self):
+ basename = 'player-%s.%s' % (test_id, stype)
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
if res:
res += ', '
res += format_bytes(fdict['filesize'])
+ elif fdict.get('filesize_approx') is not None:
+ if res:
+ res += ', '
+ res += '~' + format_bytes(fdict['filesize_approx'])
return res
def list_formats(self, info_dict):
'Adam Malcontenti-Wilson',
'Tobias Bell',
'Naglis Jonaitis',
+ 'Charles Chen',
+ 'Hassaan Ali',
)
__license__ = 'Public Domain'
import codecs
import io
-import locale
import optparse
import os
import random
-import re
import shlex
import sys
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
+from .adultswim import AdultSwimIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
+from .cracked import CrackedIE
from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE
DailymotionUserIE,
)
from .daum import DaumIE
+from .dfb import DFBIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .drtv import DRTVIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE
+from .mlb import MLBIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
RutubePersonIE,
)
from .rutv import RUTVIE
+from .sapo import SapoIE
from .savefrom import SaveFromIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
SmotriUserIE,
SmotriBroadcastIE,
)
+from .snotr import SnotrIE
from .sockshare import SockshareIE
from .sohu import SohuIE
from .soundcloud import (
YoutubeUserIE,
YoutubeWatchLaterIE,
)
+
from .zdf import ZDFIE
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+class AdultSwimIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
+ _TEST = {
+ 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
+ 'playlist': [
+ {
+ 'md5': '4da359ec73b58df4575cd01a610ba5dc',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7f02ca02f5',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ },
+ {
+ 'md5': 'ffbdf55af9331c509d95350bd0cc1819',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7f4bd102f6',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ },
+ {
+ 'md5': 'b92409635540304280b4b6c36bd14a0a',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7fa73c02f7',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ },
+ {
+ 'md5': 'e8818891d60e47b29cd89d7b0278156d',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7fc8ba02f8',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ }
+ ]
+ }
+
+ _video_extensions = {
+ '3500': 'flv',
+ '640': 'mp4',
+ '150': 'mp4',
+ 'ipad': 'm3u8',
+ 'iphone': 'm3u8'
+ }
+ _video_dimensions = {
+ '3500': (1280, 720),
+ '640': (480, 270),
+ '150': (320, 180)
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_path = mobj.group('path')
+
+ webpage = self._download_webpage(url, video_path)
+ episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
+ title = self._og_search_title(webpage)
+
+ index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
+ idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
+
+ episode_el = idoc.find('.//episode')
+ show_title = episode_el.attrib.get('collectionTitle')
+ episode_title = episode_el.attrib.get('title')
+ thumbnail = episode_el.attrib.get('thumbnailUrl')
+ description = episode_el.find('./description').text.strip()
+
+ entries = []
+ segment_els = episode_el.findall('./segments/segment')
+
+ for part_num, segment_el in enumerate(segment_els):
+ segment_id = segment_el.attrib.get('id')
+ segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
+ thumbnail = segment_el.attrib.get('thumbnailUrl')
+ duration = segment_el.attrib.get('duration')
+
+ segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
+ idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
+
+ formats = []
+ file_els = idoc.findall('.//files/file')
+
+ for file_el in file_els:
+ bitrate = file_el.attrib.get('bitrate')
+ type = file_el.attrib.get('type')
+ width, height = self._video_dimensions.get(bitrate, (None, None))
+ formats.append({
+ 'format_id': '%s-%s' % (bitrate, type),
+ 'url': file_el.text,
+ 'ext': self._video_extensions.get(bitrate, 'mp4'),
+ # The bitrate may not be a number (for example: 'iphone')
+ 'tbr': int(bitrate) if bitrate.isdigit() else None,
+ 'height': height,
+ 'width': width
+ })
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': segment_id,
+ 'title': segment_title,
+ 'formats': formats,
+ 'uploader': show_title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'description': description
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': episode_id,
+ 'display_id': video_path,
+ 'entries': entries,
+ 'title': '%s %s' % (show_title, episode_title),
+ 'description': description,
+ 'thumbnail': thumbnail
+ }
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
- 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
+ 'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
- 'description': 'md5:e74a4dc750894bac300ece46c7036490',
+ 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
'thumbnail': 're:http://.*\.jpg',
},
}]
from ..utils import (
determine_ext,
ExtractorError,
+ qualities,
)
class ARDIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+ _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
- _TEST = {
- 'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
- 'file': '19288786.mp4',
- 'md5': '515bf47ce209fb3f5a61b7aad364634c',
+ _TESTS = [{
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'file': '22429276.mp4',
+ 'md5': '469751912f1de0816a9fc9df8336476c',
'info_dict': {
- 'title': 'Edward Snowden im Interview - Held oder Verräter?',
- 'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
- 'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
+ 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
+ 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
},
'skip': 'Blocked outside of Germany',
- }
+ }, {
+ 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
+ 'info_dict': {
+ 'id': '22490580',
+ 'ext': 'mp4',
+ 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
+ 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
+ },
+ 'skip': 'Blocked outside of Germany',
+ }]
def _real_extract(self, url):
# determine video id from url
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(
- 'dcterms.abstract', webpage, 'description')
- thumbnail = self._og_search_thumbnail(webpage)
-
-
- media_info = self._download_json(
- 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
- # The second element of the _mediaArray contains the standard http urls
- streams = media_info['_mediaArray'][1]['_mediaStreamArray']
- if not streams:
- if '"fsk"' in webpage:
- raise ExtractorError('This video is only available after 20:00')
-
- formats = []
-
- for s in streams:
- if type(s['_stream']) == list:
- for index, url in enumerate(s['_stream'][::-1]):
- quality = s['_quality'] + index
- formats.append({
- 'quality': quality,
- 'url': url,
- 'format_id': '%s-%s' % (determine_ext(url), quality)
+ 'dcterms.abstract', webpage, 'description', default=None)
+ if description is None:
+ description = self._html_search_meta(
+ 'description', webpage, 'meta description')
+
+ # Thumbnail is sometimes not present.
+ # It is in the mobile version, but that seems to use a different URL
+ # structure altogether.
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ media_streams = re.findall(r'''(?x)
+ mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
+ "([^"]+)"''', webpage)
+
+ if media_streams:
+ QUALITIES = qualities(['lo', 'hi', 'hq'])
+ formats = []
+ for furl in set(media_streams):
+ if furl.endswith('.f4m'):
+ fid = 'f4m'
+ else:
+ fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
+ fid = fid_m.group(1) if fid_m else None
+ formats.append({
+ 'quality': QUALITIES(fid),
+ 'format_id': fid,
+ 'url': furl,
+ })
+ else: # request JSON file
+ media_info = self._download_json(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+ # The second element of the _mediaArray contains the standard http urls
+ streams = media_info['_mediaArray'][1]['_mediaStreamArray']
+ if not streams:
+ if '"fsk"' in webpage:
+ raise ExtractorError('This video is only available after 20:00')
+
+ formats = []
+ for s in streams:
+ if type(s['_stream']) == list:
+ for index, url in enumerate(s['_stream'][::-1]):
+ quality = s['_quality'] + index
+ formats.append({
+ 'quality': quality,
+ 'url': url,
+ 'format_id': '%s-%s' % (determine_ext(url), quality)
})
- continue
+ continue
- format = {
- 'quality': s['_quality'],
- 'url': s['_stream'],
- }
+ format = {
+ 'quality': s['_quality'],
+ 'url': s['_stream'],
+ }
- format['format_id'] = '%s-%s' % (
- determine_ext(format['url']), format['quality'])
+ format['format_id'] = '%s-%s' % (
+ determine_ext(format['url']), format['quality'])
- formats.append(format)
+ formats.append(format)
self._sort_formats(formats)
'id': '85523671',
'ext': 'mp4',
'title': 'The Sunday Times - Icons',
- 'description': 'md5:3e1c0dc6047498d6728dcdaad0891762',
+ 'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
'uploader': 'Us',
'uploader_id': 'usfilms',
'upload_date': '20140131'
raise ExtractorError('Cannot find video data')
video_id = vdata['id']
- title = vdata['headline']
+ title = vdata.get('headline')
+ if title is None:
+ title = vdata.get('title')
+ if title is None:
+ raise ExtractorError('Cannot find title!')
description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author')
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
- (video-clips|episodes|cc-studios|video-collections)
+ _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
+ (video-clips|episodes|cc-studios|video-collections|full-episodes)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TEST = {
- 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+ 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
* vcodec Name of the video codec in use
* container Name of the container format
* filesize The number of bytes, if known in advance
+ * filesize_approx An estimate for the number of bytes
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
- (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
+ (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=fatal, **kwargs)
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('format_id'),
)
formats.sort(key=_formats_key)
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class CrackedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
+ _TEST = {
+ 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
+ 'md5': '4b29a5eeec292cd5eca6388c7558db9e',
+ 'info_dict': {
+ 'id': '19006',
+ 'ext': 'mp4',
+ 'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
+ 'description': 'md5:3b909e752661db86007d10e5ec2df769',
+ 'timestamp': 1405659600,
+ 'upload_date': '20140718',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
+ timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
+ if timestamp:
+ timestamp = parse_iso8601(timestamp[:-6])
+
+ view_count = str_to_int(self._html_search_regex(
+ r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
+
+ m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
+ if m:
+ width = int(m.group('width'))
+ height = int(m.group('height'))
+ else:
+ width = height = None
+
+ return {
+ 'id': video_id,
+ 'url':video_url,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'height': height,
+ 'width': width,
+ }
\ No newline at end of file
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DFBIE(InfoExtractor):
+ IE_NAME = 'tv.dfb.de'
+ _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+ # The md5 is different each time
+ 'info_dict': {
+ 'id': '9070',
+ 'ext': 'flv',
+ 'title': 'Highlights des Empfangs in Berlin',
+ 'upload_date': '20140716',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ player_info = self._download_xml(
+ 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
+ video_id)
+ video_info = player_info.find('video')
+
+ f4m_info = self._download_xml(video_info.find('url').text, video_id)
+ token_el = f4m_info.find('token')
+ manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+
+ return {
+ 'id': video_id,
+ 'title': video_info.find('title').text,
+ 'url': manifest_url,
+ 'ext': 'flv',
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+ }
import re
from .common import InfoExtractor
+from ..utils import compat_urllib_parse
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
_TEST = {
- 'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4',
- 'md5': '8ae17c51172fb7f93bdd6a214cc8c896',
+ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
+ 'md5': '8a3d905427a6951ccb9eb292f154530b',
'info_dict': {
- 'id': '0qr9sai2veej4f8',
+ 'id': 'nelirfsxnmcfbfh',
'ext': 'mp4',
- 'title': 'THE_DOCTOR_GAMES'
+ 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- title = os.path.splitext(mobj.group('title'))[0]
+ fn = compat_urllib_parse.unquote(mobj.group('title'))
+ title = os.path.splitext(fn)[0]
video_url = url + '?dl=1'
return {
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
- determine_ext,
)
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr'
- _VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
+ _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
class CultureboxIE(FranceTVBaseInfoExtractor):
IE_NAME = 'culturebox.francetvinfo.fr'
- _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
+ _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
'id': 'e402820827',
'ext': 'mp4',
'title': 'Please Use This Song (Jon Lajoie)',
- 'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
+ 'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': 're:^http:.*\.jpg$',
},
}]
elif default_search == 'error':
raise ExtractorError(
('%r is not a valid URL. '
- 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
+ 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else:
assert ':' in default_search
}
def _extract_video_info(self, video_data):
- video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
+ video_url = (
+ video_data.get('progressive_url_hd') or
+ video_data.get('progressive_url')
+ )
return {
'id': compat_str(video_data['id']),
'url': video_url,
- 'ext': 'mp4',
'title': video_data['caption'],
'thumbnail': video_data['thumbnail_url'],
'upload_date': video_data['updated_at'].replace('-', '')[:8],
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data'] if video_data['type'] == 'video']
+ for video_data in info['feed']['data']
+ if video_data['type'] == 'video']
return self.playlist_result(videos, info['id'], info['full_name'])
else:
og_video = self._og_search_video_url(webpage, 'player url')
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ find_xpath_attr,
+)
+
+
+class MLBIE(InfoExtractor):
+ _VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
+ 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
+ 'info_dict': {
+ 'id': '34496663',
+ 'ext': 'mp4',
+ 'title': 'Stanton prepares for Derby',
+ 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
+ 'duration': 46,
+ 'timestamp': 1405105800,
+ 'upload_date': '20140711',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
+ 'md5': '0e6e73d509321e142409b695eadd541f',
+ 'info_dict': {
+ 'id': '34578115',
+ 'ext': 'mp4',
+ 'title': 'Cespedes repeats as Derby champ',
+ 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
+ 'duration': 488,
+ 'timestamp': 1405399936,
+ 'upload_date': '20140715',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
+ 'md5': 'b8fd237347b844365d74ea61d4245967',
+ 'info_dict': {
+ 'id': '34577915',
+ 'ext': 'mp4',
+ 'title': 'Bautista on Home Run Derby',
+ 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
+ 'duration': 52,
+ 'timestamp': 1405390722,
+ 'upload_date': '20140715',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ detail = self._download_xml(
+ 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
+ % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
+
+ title = detail.find('./headline').text
+ description = detail.find('./big-blurb').text
+ duration = parse_duration(detail.find('./duration').text)
+ timestamp = parse_iso8601(detail.attrib['date'][:-5])
+
+ thumbnail = find_xpath_attr(
+ detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text
+
+ formats = []
+ for media_url in detail.findall('./url'):
+ playback_scenario = media_url.attrib['playback_scenario']
+ fmt = {
+ 'url': media_url.text,
+ 'format_id': playback_scenario,
+ }
+ m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
+ if m:
+ fmt.update({
+ 'vbr': int(m.group('vbr')) * 1000,
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ formats.append(fmt)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id,
# We have to remove the javascript callback
- transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
+ transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
)
token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js',
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title')
- video_thumbnail = self._html_search_regex(
- r'playerInnerHTML.+?<img\s+src="(.+?)"',
- webpage, u'thumbnail', fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
# No self-labeling, but they describe themselves as
# "Home of Videos Porno"
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
data = json.loads(self._html_search_regex(
- r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
+ r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data']
video_url = data.get('downloadUrl') or data.get('url')
_TEST = {
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
- 'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
+ 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
'info_dict': {
'id': '2491869',
'ext': 'mp4',
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
+
+
+class SapoIE(InfoExtractor):
+ IE_DESC = 'SAPO Vídeos'
+ _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
+
+ _TESTS = [
+ {
+ 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
+ 'md5': '79ee523f6ecb9233ac25075dee0eda83',
+ 'note': 'SD video',
+ 'info_dict': {
+ 'id': 'UBz95kOtiWYUMTA5Ghfi',
+ 'ext': 'mp4',
+ 'title': 'Benfica - Marcas na Hitória',
+ 'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
+ 'duration': 264,
+ 'uploader': 'tiago_1988',
+ 'upload_date': '20080229',
+ 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
+ },
+ },
+ {
+ 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
+ 'md5': '90a2f283cfb49193fe06e861613a72aa',
+ 'note': 'HD video',
+ 'info_dict': {
+ 'id': 'IyusNAZ791ZdoCY5H5IF',
+ 'ext': 'mp4',
+ 'title': 'Codebits VII - Report',
+ 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
+ 'duration': 144,
+ 'uploader': 'codebits',
+ 'upload_date': '20140427',
+ 'categories': ['codebits', 'codebits2014'],
+ },
+ },
+ {
+ 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
+ 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
+ 'note': 'v2 video',
+ 'info_dict': {
+ 'id': 'yLqjzPtbTimsn2wWBKHz',
+ 'ext': 'mp4',
+ 'title': 'Hipnose Condicionativa 4',
+ 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
+ 'duration': 692,
+ 'uploader': 'sapozen',
+ 'upload_date': '20090609',
+ 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ item = self._download_xml(
+ 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
+
+ title = item.find('./title').text
+ description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
+ thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
+ duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
+ uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
+ upload_date = unified_strdate(item.find('./pubDate').text)
+ view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
+ comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
+ tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
+ categories = tags.split() if tags else []
+ age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
+
+ video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
+ video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': 'sd',
+ 'width': int(video_size[0]),
+ 'height': int(video_size[1]),
+ }]
+
+ if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
+ formats.append({
+ 'url': re.sub(r'/mov/1$', '/mov/39', video_url),
+ 'ext': 'mp4',
+ 'format_id': 'hd',
+ 'width': 1280,
+ 'height': 720,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
'upload_date': '20120816',
'uploader': 'Howcast',
'uploader_id': 'Howcast',
- 'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
+ 'description': 'md5:727900f130df3dc9a25e2721497c7910',
},
'params': {
'skip_download': True
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ str_to_int,
+ parse_duration,
+)
+
+
+class SnotrIE(InfoExtractor):
+ _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
+ _TESTS = [{
+ 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
+ 'info_dict': {
+ 'id': '13708',
+ 'ext': 'flv',
+ 'title': 'Drone flying through fireworks!',
+ 'duration': 247,
+ 'filesize_approx': 98566144,
+ 'description': 'A drone flying through Fourth of July Fireworks',
+ }
+ }, {
+ 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
+ 'info_dict': {
+ 'id': '530',
+ 'ext': 'flv',
+ 'title': 'David Letteman - George W. Bush Top 10',
+ 'duration': 126,
+ 'filesize_approx': 8912896,
+ 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+
+ description = self._og_search_description(webpage)
+ video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
+
+ view_count = str_to_int(self._html_search_regex(
+ r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
+ webpage, 'view count', fatal=False))
+
+ duration = parse_duration(self._html_search_regex(
+ r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
+ webpage, 'duration', fatal=False))
+
+ filesize_approx = float_or_none(self._html_search_regex(
+ r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
+ webpage, 'filesize', fatal=False), invscale=1024 * 1024)
+
+ return {
+ 'id': video_id,
+ 'description': description,
+ 'title': title,
+ 'url': video_url,
+ 'view_count': view_count,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ }
'ext': 'mp4',
'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer',
- 'description': 'md5:6df4fe8dd494ae811869672b0767e025',
+ 'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames',
}
'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$',
},
- }, {
- 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
- 'md5': '66652566900963a3f962333579eeffcf',
- 'info_dict': {
- 'id': '5964',
- 'ext': 'mp4',
- 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
- 'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
- 'thumbnail': 're:http://.*\.jpg',
- },
}]
_FORMATS = {
webpage = self._download_webpage(url, video_id)
- title = self._html_search_meta('title', webpage, 'title')
+ title = self._html_search_meta('title', webpage, 'title', fatal=True)
TITLE_SUFFIX = ' - TeacherTube'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)].strip()
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
- _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
+ _MEDIA_RE = r'''(?sx)
+ class="?sidebar_thumb_time"?>[0-9:]+</div>
+ \s*
+ <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
+ '''
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, user_id)
urls.extend(re.findall(self._MEDIA_RE, webpage))
- pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
+ pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
- webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
- urls.extend(re.findall(self._MEDIA_RE, webpage))
-
- entries = []
- for url in urls:
- entries.append(self.url_result(url, 'TeacherTube'))
+ webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
+ video_urls = re.findall(self._MEDIA_RE, webpage)
+ urls.extend(video_urls)
+ entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
return self.playlist_result(entries, user_id)
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
'info_dict': {
'id': '54469442',
'ext': 'mp4',
- 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software',
+ 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
'uploader': 'The BLN & Business of Software',
'uploader_id': 'theblnbusinessofsoftware',
'duration': 3610,
},
},
{
- 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
- 'md5': '24e83813e832badb0a8d7d1ef9ef0691',
+ 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
+ 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
'info_dict': {
- 'id': 'mdb-463528',
+ 'id': 'mdb-478135',
'ext': 'mp3',
- 'title': 'Süpersong: Soul Bossa Nova',
+ 'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
- 'upload_date': '20140630',
+ 'upload_date': '20140717',
},
},
]
# coding: utf-8
-import collections
import errno
import io
import itertools
import json
import os.path
import re
-import struct
import traceback
-import zlib
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
from ..utils import (
compat_chr,
compat_parse_qs,
self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url, slen):
- id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
- player_url)
+ id_m = re.match(
+ r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
+ player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
return lambda s: initial_function([s])
def _parse_sig_swf(self, file_contents):
- if file_contents[1:3] != b'WS':
- raise ExtractorError(
- u'Not an SWF file; header is %r' % file_contents[:3])
- if file_contents[:1] == b'C':
- content = zlib.decompress(file_contents[8:])
- else:
- raise NotImplementedError(u'Unsupported compression format %r' %
- file_contents[:1])
-
- def extract_tags(content):
- pos = 0
- while pos < len(content):
- header16 = struct.unpack('<H', content[pos:pos+2])[0]
- pos += 2
- tag_code = header16 >> 6
- tag_len = header16 & 0x3f
- if tag_len == 0x3f:
- tag_len = struct.unpack('<I', content[pos:pos+4])[0]
- pos += 4
- assert pos+tag_len <= len(content)
- yield (tag_code, content[pos:pos+tag_len])
- pos += tag_len
-
- code_tag = next(tag
- for tag_code, tag in extract_tags(content)
- if tag_code == 82)
- p = code_tag.index(b'\0', 4) + 1
- code_reader = io.BytesIO(code_tag[p:])
-
- # Parse ABC (AVM2 ByteCode)
- def read_int(reader=None):
- if reader is None:
- reader = code_reader
- res = 0
- shift = 0
- for _ in range(5):
- buf = reader.read(1)
- assert len(buf) == 1
- b = struct.unpack('<B', buf)[0]
- res = res | ((b & 0x7f) << shift)
- if b & 0x80 == 0:
- break
- shift += 7
- return res
-
- def u30(reader=None):
- res = read_int(reader)
- assert res & 0xf0000000 == 0
- return res
- u32 = read_int
-
- def s32(reader=None):
- v = read_int(reader)
- if v & 0x80000000 != 0:
- v = - ((v ^ 0xffffffff) + 1)
- return v
-
- def read_string(reader=None):
- if reader is None:
- reader = code_reader
- slen = u30(reader)
- resb = reader.read(slen)
- assert len(resb) == slen
- return resb.decode('utf-8')
-
- def read_bytes(count, reader=None):
- if reader is None:
- reader = code_reader
- resb = reader.read(count)
- assert len(resb) == count
- return resb
-
- def read_byte(reader=None):
- resb = read_bytes(1, reader=reader)
- res = struct.unpack('<B', resb)[0]
- return res
-
- # minor_version + major_version
- read_bytes(2 + 2)
-
- # Constant pool
- int_count = u30()
- for _c in range(1, int_count):
- s32()
- uint_count = u30()
- for _c in range(1, uint_count):
- u32()
- double_count = u30()
- read_bytes((double_count-1) * 8)
- string_count = u30()
- constant_strings = [u'']
- for _c in range(1, string_count):
- s = read_string()
- constant_strings.append(s)
- namespace_count = u30()
- for _c in range(1, namespace_count):
- read_bytes(1) # kind
- u30() # name
- ns_set_count = u30()
- for _c in range(1, ns_set_count):
- count = u30()
- for _c2 in range(count):
- u30()
- multiname_count = u30()
- MULTINAME_SIZES = {
- 0x07: 2, # QName
- 0x0d: 2, # QNameA
- 0x0f: 1, # RTQName
- 0x10: 1, # RTQNameA
- 0x11: 0, # RTQNameL
- 0x12: 0, # RTQNameLA
- 0x09: 2, # Multiname
- 0x0e: 2, # MultinameA
- 0x1b: 1, # MultinameL
- 0x1c: 1, # MultinameLA
- }
- multinames = [u'']
- for _c in range(1, multiname_count):
- kind = u30()
- assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
- if kind == 0x07:
- u30() # namespace_idx
- name_idx = u30()
- multinames.append(constant_strings[name_idx])
- else:
- multinames.append('[MULTINAME kind: %d]' % kind)
- for _c2 in range(MULTINAME_SIZES[kind]):
- u30()
-
- # Methods
- method_count = u30()
- MethodInfo = collections.namedtuple(
- 'MethodInfo',
- ['NEED_ARGUMENTS', 'NEED_REST'])
- method_infos = []
- for method_id in range(method_count):
- param_count = u30()
- u30() # return type
- for _ in range(param_count):
- u30() # param type
- u30() # name index (always 0 for youtube)
- flags = read_byte()
- if flags & 0x08 != 0:
- # Options present
- option_count = u30()
- for c in range(option_count):
- u30() # val
- read_bytes(1) # kind
- if flags & 0x80 != 0:
- # Param names present
- for _ in range(param_count):
- u30() # param name
- mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
- method_infos.append(mi)
-
- # Metadata
- metadata_count = u30()
- for _c in range(metadata_count):
- u30() # name
- item_count = u30()
- for _c2 in range(item_count):
- u30() # key
- u30() # value
-
- def parse_traits_info():
- trait_name_idx = u30()
- kind_full = read_byte()
- kind = kind_full & 0x0f
- attrs = kind_full >> 4
- methods = {}
- if kind in [0x00, 0x06]: # Slot or Const
- u30() # Slot id
- u30() # type_name_idx
- vindex = u30()
- if vindex != 0:
- read_byte() # vkind
- elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
- u30() # disp_id
- method_idx = u30()
- methods[multinames[trait_name_idx]] = method_idx
- elif kind == 0x04: # Class
- u30() # slot_id
- u30() # classi
- elif kind == 0x05: # Function
- u30() # slot_id
- function_idx = u30()
- methods[function_idx] = multinames[trait_name_idx]
- else:
- raise ExtractorError(u'Unsupported trait kind %d' % kind)
-
- if attrs & 0x4 != 0: # Metadata present
- metadata_count = u30()
- for _c3 in range(metadata_count):
- u30() # metadata index
-
- return methods
-
- # Classes
+ swfi = SWFInterpreter(file_contents)
TARGET_CLASSNAME = u'SignatureDecipher'
- searched_idx = multinames.index(TARGET_CLASSNAME)
- searched_class_id = None
- class_count = u30()
- for class_id in range(class_count):
- name_idx = u30()
- if name_idx == searched_idx:
- # We found the class we're looking for!
- searched_class_id = class_id
- u30() # super_name idx
- flags = read_byte()
- if flags & 0x08 != 0: # Protected namespace is present
- u30() # protected_ns_idx
- intrf_count = u30()
- for _c2 in range(intrf_count):
- u30()
- u30() # iinit
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- if searched_class_id is None:
- raise ExtractorError(u'Target class %r not found' %
- TARGET_CLASSNAME)
-
- method_names = {}
- method_idxs = {}
- for class_id in range(class_count):
- u30() # cinit
- trait_count = u30()
- for _c2 in range(trait_count):
- trait_methods = parse_traits_info()
- if class_id == searched_class_id:
- method_names.update(trait_methods.items())
- method_idxs.update(dict(
- (idx, name)
- for name, idx in trait_methods.items()))
-
- # Scripts
- script_count = u30()
- for _c in range(script_count):
- u30() # init
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- # Method bodies
- method_body_count = u30()
- Method = collections.namedtuple('Method', ['code', 'local_count'])
- methods = {}
- for _c in range(method_body_count):
- method_idx = u30()
- u30() # max_stack
- local_count = u30()
- u30() # init_scope_depth
- u30() # max_scope_depth
- code_length = u30()
- code = read_bytes(code_length)
- if method_idx in method_idxs:
- m = Method(code, local_count)
- methods[method_idxs[method_idx]] = m
- exception_count = u30()
- for _c2 in range(exception_count):
- u30() # from
- u30() # to
- u30() # target
- u30() # exc_type
- u30() # var_name
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- assert p + code_reader.tell() == len(code_tag)
- assert len(methods) == len(method_idxs)
-
- method_pyfunctions = {}
-
- def extract_function(func_name):
- if func_name in method_pyfunctions:
- return method_pyfunctions[func_name]
- if func_name not in methods:
- raise ExtractorError(u'Cannot find function %r' % func_name)
- m = methods[func_name]
-
- def resfunc(args):
- registers = ['(this)'] + list(args) + [None] * m.local_count
- stack = []
- coder = io.BytesIO(m.code)
- while True:
- opcode = struct.unpack('!B', coder.read(1))[0]
- if opcode == 36: # pushbyte
- v = struct.unpack('!B', coder.read(1))[0]
- stack.append(v)
- elif opcode == 44: # pushstring
- idx = u30(coder)
- stack.append(constant_strings[idx])
- elif opcode == 48: # pushscope
- # We don't implement the scope register, so we'll just
- # ignore the popped value
- stack.pop()
- elif opcode == 70: # callproperty
- index = u30(coder)
- mname = multinames[index]
- arg_count = u30(coder)
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- if mname == u'split':
- assert len(args) == 1
- assert isinstance(args[0], compat_str)
- assert isinstance(obj, compat_str)
- if args[0] == u'':
- res = list(obj)
- else:
- res = obj.split(args[0])
- stack.append(res)
- elif mname == u'slice':
- assert len(args) == 1
- assert isinstance(args[0], int)
- assert isinstance(obj, list)
- res = obj[args[0]:]
- stack.append(res)
- elif mname == u'join':
- assert len(args) == 1
- assert isinstance(args[0], compat_str)
- assert isinstance(obj, list)
- res = args[0].join(obj)
- stack.append(res)
- elif mname in method_pyfunctions:
- stack.append(method_pyfunctions[mname](args))
- else:
- raise NotImplementedError(
- u'Unsupported property %r on %r'
- % (mname, obj))
- elif opcode == 72: # returnvalue
- res = stack.pop()
- return res
- elif opcode == 79: # callpropvoid
- index = u30(coder)
- mname = multinames[index]
- arg_count = u30(coder)
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- if mname == u'reverse':
- assert isinstance(obj, list)
- obj.reverse()
- else:
- raise NotImplementedError(
- u'Unsupported (void) property %r on %r'
- % (mname, obj))
- elif opcode == 93: # findpropstrict
- index = u30(coder)
- mname = multinames[index]
- res = extract_function(mname)
- stack.append(res)
- elif opcode == 97: # setproperty
- index = u30(coder)
- value = stack.pop()
- idx = stack.pop()
- obj = stack.pop()
- assert isinstance(obj, list)
- assert isinstance(idx, int)
- obj[idx] = value
- elif opcode == 98: # getlocal
- index = u30(coder)
- stack.append(registers[index])
- elif opcode == 99: # setlocal
- index = u30(coder)
- value = stack.pop()
- registers[index] = value
- elif opcode == 102: # getproperty
- index = u30(coder)
- pname = multinames[index]
- if pname == u'length':
- obj = stack.pop()
- assert isinstance(obj, list)
- stack.append(len(obj))
- else: # Assume attribute access
- idx = stack.pop()
- assert isinstance(idx, int)
- obj = stack.pop()
- assert isinstance(obj, list)
- stack.append(obj[idx])
- elif opcode == 128: # coerce
- u30(coder)
- elif opcode == 133: # coerce_s
- assert isinstance(stack[-1], (type(None), compat_str))
- elif opcode == 164: # modulo
- value2 = stack.pop()
- value1 = stack.pop()
- res = value1 % value2
- stack.append(res)
- elif opcode == 208: # getlocal_0
- stack.append(registers[0])
- elif opcode == 209: # getlocal_1
- stack.append(registers[1])
- elif opcode == 210: # getlocal_2
- stack.append(registers[2])
- elif opcode == 211: # getlocal_3
- stack.append(registers[3])
- elif opcode == 214: # setlocal_2
- registers[2] = stack.pop()
- elif opcode == 215: # setlocal_3
- registers[3] = stack.pop()
- else:
- raise NotImplementedError(
- u'Unsupported opcode %d' % opcode)
-
- method_pyfunctions[func_name] = resfunc
- return resfunc
-
- initial_function = extract_function(u'decipher')
+ searched_class = swfi.extract_class(TARGET_CLASSNAME)
+ initial_function = swfi.extract_function(searched_class, u'decipher')
return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
- data = compat_urllib_parse.urlencode({'video_id': video_id,
- 'el': 'player_embedded',
- 'gl': 'US',
- 'hl': 'en',
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'asv': 3,
- 'sts':'1588',
- })
+ data = compat_urllib_parse.urlencode({
+ 'video_id': video_id,
+ 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+ 'sts':'16268',
+ })
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
encrypted_sig = url_data['s'][0]
+
+ if not age_gate:
+ jsplayer_url_json = self._search_regex(
+ r'"assets":.+?"js":\s*("[^"]+")',
+ video_webpage, u'JS player URL')
+ player_url = json.loads(jsplayer_url_json)
+ if player_url is None:
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, u'age gate player URL')
+ player_url = json.loads(player_url_json)
+
if self._downloader.params.get('verbose'):
- if age_gate:
- if player_url is None:
- player_version = 'unknown'
- else:
+ if player_url is None:
+ player_version = 'unknown'
+ player_desc = 'unknown'
+ else:
+ if player_url.endswith('swf'):
player_version = self._search_regex(
- r'-(.+)\.swf$', player_url,
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
u'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- r'html5player-(.+?)\.js', video_webpage,
- 'html5 player', fatal=False)
- player_desc = u'html5 player %s' % player_version
+ player_desc = 'flash player %s' % player_version
+ else:
+ player_version = self._search_regex(
+ r'html5player-(.+?)\.js', video_webpage,
+ 'html5 player', fatal=False)
+ player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
- if not age_gate:
- jsplayer_url_json = self._search_regex(
- r'"assets":.+?"js":\s*("[^"]+")',
- video_webpage, u'JS player URL')
- player_url = json.loads(jsplayer_url_json)
-
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature
--- /dev/null
+from __future__ import unicode_literals
+
+import collections
+import io
+import zlib
+
+from .utils import (
+ compat_str,
+ ExtractorError,
+ struct_unpack,
+)
+
+
+def _extract_tags(file_contents):
+ if file_contents[1:3] != b'WS':
+ raise ExtractorError(
+ 'Not an SWF file; header is %r' % file_contents[:3])
+ if file_contents[:1] == b'C':
+ content = zlib.decompress(file_contents[8:])
+ else:
+ raise NotImplementedError(
+ 'Unsupported compression format %r' %
+ file_contents[:1])
+
+ # Determine number of bits in framesize rectangle
+ framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
+ framesize_len = (5 + 4 * framesize_nbits + 7) // 8
+
+ pos = framesize_len + 2 + 2
+ while pos < len(content):
+ header16 = struct_unpack('<H', content[pos:pos + 2])[0]
+ pos += 2
+ tag_code = header16 >> 6
+ tag_len = header16 & 0x3f
+ if tag_len == 0x3f:
+ tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
+ pos += 4
+ assert pos + tag_len <= len(content), \
+ ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
+ % (tag_code, pos, tag_len, len(content)))
+ yield (tag_code, content[pos:pos + tag_len])
+ pos += tag_len
+
+
+class _AVMClass_Object(object):
+ def __init__(self, avm_class):
+ self.avm_class = avm_class
+
+ def __repr__(self):
+ return '%s#%x' % (self.avm_class.name, id(self))
+
+
+class _ScopeDict(dict):
+ def __init__(self, avm_class):
+ super(_ScopeDict, self).__init__()
+ self.avm_class = avm_class
+
+ def __repr__(self):
+ return '%s__Scope(%s)' % (
+ self.avm_class.name,
+ super(_ScopeDict, self).__repr__())
+
+
+class _AVMClass(object):
+ def __init__(self, name_idx, name):
+ self.name_idx = name_idx
+ self.name = name
+ self.method_names = {}
+ self.method_idxs = {}
+ self.methods = {}
+ self.method_pyfunctions = {}
+
+ self.variables = _ScopeDict(self)
+
+ def make_object(self):
+ return _AVMClass_Object(self)
+
+ def __repr__(self):
+ return '_AVMClass(%s)' % (self.name)
+
+ def register_methods(self, methods):
+ self.method_names.update(methods.items())
+ self.method_idxs.update(dict(
+ (idx, name)
+ for name, idx in methods.items()))
+
+
+class _Multiname(object):
+ def __init__(self, kind):
+ self.kind = kind
+
+ def __repr__(self):
+ return '[MULTINAME kind: 0x%x]' % self.kind
+
+
+def _read_int(reader):
+ res = 0
+ shift = 0
+ for _ in range(5):
+ buf = reader.read(1)
+ assert len(buf) == 1
+ b = struct_unpack('<B', buf)[0]
+ res = res | ((b & 0x7f) << shift)
+ if b & 0x80 == 0:
+ break
+ shift += 7
+ return res
+
+
+def _u30(reader):
+ res = _read_int(reader)
+ assert res & 0xf0000000 == 0
+ return res
+_u32 = _read_int
+
+
+def _s32(reader):
+ v = _read_int(reader)
+ if v & 0x80000000 != 0:
+ v = - ((v ^ 0xffffffff) + 1)
+ return v
+
+
+def _s24(reader):
+ bs = reader.read(3)
+ assert len(bs) == 3
+ last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
+ return struct_unpack('<i', bs + last_byte)[0]
+
+
+def _read_string(reader):
+ slen = _u30(reader)
+ resb = reader.read(slen)
+ assert len(resb) == slen
+ return resb.decode('utf-8')
+
+
+def _read_bytes(count, reader):
+ assert count >= 0
+ resb = reader.read(count)
+ assert len(resb) == count
+ return resb
+
+
+def _read_byte(reader):
+ resb = _read_bytes(1, reader=reader)
+ res = struct_unpack('<B', resb)[0]
+ return res
+
+
+class SWFInterpreter(object):
+ def __init__(self, file_contents):
+ code_tag = next(tag
+ for tag_code, tag in _extract_tags(file_contents)
+ if tag_code == 82)
+ p = code_tag.index(b'\0', 4) + 1
+ code_reader = io.BytesIO(code_tag[p:])
+
+ # Parse ABC (AVM2 ByteCode)
+
+ # Define a couple convenience methods
+ u30 = lambda *args: _u30(*args, reader=code_reader)
+ s32 = lambda *args: _s32(*args, reader=code_reader)
+ u32 = lambda *args: _u32(*args, reader=code_reader)
+ read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
+ read_byte = lambda *args: _read_byte(*args, reader=code_reader)
+
+ # minor_version + major_version
+ read_bytes(2 + 2)
+
+ # Constant pool
+ int_count = u30()
+ for _c in range(1, int_count):
+ s32()
+ uint_count = u30()
+ for _c in range(1, uint_count):
+ u32()
+ double_count = u30()
+ read_bytes(max(0, (double_count - 1)) * 8)
+ string_count = u30()
+ self.constant_strings = ['']
+ for _c in range(1, string_count):
+ s = _read_string(code_reader)
+ self.constant_strings.append(s)
+ namespace_count = u30()
+ for _c in range(1, namespace_count):
+ read_bytes(1) # kind
+ u30() # name
+ ns_set_count = u30()
+ for _c in range(1, ns_set_count):
+ count = u30()
+ for _c2 in range(count):
+ u30()
+ multiname_count = u30()
+ MULTINAME_SIZES = {
+ 0x07: 2, # QName
+ 0x0d: 2, # QNameA
+ 0x0f: 1, # RTQName
+ 0x10: 1, # RTQNameA
+ 0x11: 0, # RTQNameL
+ 0x12: 0, # RTQNameLA
+ 0x09: 2, # Multiname
+ 0x0e: 2, # MultinameA
+ 0x1b: 1, # MultinameL
+ 0x1c: 1, # MultinameLA
+ }
+ self.multinames = ['']
+ for _c in range(1, multiname_count):
+ kind = u30()
+ assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
+ if kind == 0x07:
+ u30() # namespace_idx
+ name_idx = u30()
+ self.multinames.append(self.constant_strings[name_idx])
+ else:
+ self.multinames.append(_Multiname(kind))
+ for _c2 in range(MULTINAME_SIZES[kind]):
+ u30()
+
+ # Methods
+ method_count = u30()
+ MethodInfo = collections.namedtuple(
+ 'MethodInfo',
+ ['NEED_ARGUMENTS', 'NEED_REST'])
+ method_infos = []
+ for method_id in range(method_count):
+ param_count = u30()
+ u30() # return type
+ for _ in range(param_count):
+ u30() # param type
+ u30() # name index (always 0 for youtube)
+ flags = read_byte()
+ if flags & 0x08 != 0:
+ # Options present
+ option_count = u30()
+ for c in range(option_count):
+ u30() # val
+ read_bytes(1) # kind
+ if flags & 0x80 != 0:
+ # Param names present
+ for _ in range(param_count):
+ u30() # param name
+ mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
+ method_infos.append(mi)
+
+ # Metadata
+ metadata_count = u30()
+ for _c in range(metadata_count):
+ u30() # name
+ item_count = u30()
+ for _c2 in range(item_count):
+ u30() # key
+ u30() # value
+
+ def parse_traits_info():
+ trait_name_idx = u30()
+ kind_full = read_byte()
+ kind = kind_full & 0x0f
+ attrs = kind_full >> 4
+ methods = {}
+ if kind in [0x00, 0x06]: # Slot or Const
+ u30() # Slot id
+ u30() # type_name_idx
+ vindex = u30()
+ if vindex != 0:
+ read_byte() # vkind
+ elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
+ u30() # disp_id
+ method_idx = u30()
+ methods[self.multinames[trait_name_idx]] = method_idx
+ elif kind == 0x04: # Class
+ u30() # slot_id
+ u30() # classi
+ elif kind == 0x05: # Function
+ u30() # slot_id
+ function_idx = u30()
+ methods[function_idx] = self.multinames[trait_name_idx]
+ else:
+ raise ExtractorError('Unsupported trait kind %d' % kind)
+
+ if attrs & 0x4 != 0: # Metadata present
+ metadata_count = u30()
+ for _c3 in range(metadata_count):
+ u30() # metadata index
+
+ return methods
+
+ # Classes
+ class_count = u30()
+ classes = []
+ for class_id in range(class_count):
+ name_idx = u30()
+
+ cname = self.multinames[name_idx]
+ avm_class = _AVMClass(name_idx, cname)
+ classes.append(avm_class)
+
+ u30() # super_name idx
+ flags = read_byte()
+ if flags & 0x08 != 0: # Protected namespace is present
+ u30() # protected_ns_idx
+ intrf_count = u30()
+ for _c2 in range(intrf_count):
+ u30()
+ u30() # iinit
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ trait_methods = parse_traits_info()
+ avm_class.register_methods(trait_methods)
+
+ assert len(classes) == class_count
+ self._classes_by_name = dict((c.name, c) for c in classes)
+
+ for avm_class in classes:
+ u30() # cinit
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ trait_methods = parse_traits_info()
+ avm_class.register_methods(trait_methods)
+
+ # Scripts
+ script_count = u30()
+ for _c in range(script_count):
+ u30() # init
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ parse_traits_info()
+
+ # Method bodies
+ method_body_count = u30()
+ Method = collections.namedtuple('Method', ['code', 'local_count'])
+ for _c in range(method_body_count):
+ method_idx = u30()
+ u30() # max_stack
+ local_count = u30()
+ u30() # init_scope_depth
+ u30() # max_scope_depth
+ code_length = u30()
+ code = read_bytes(code_length)
+ for avm_class in classes:
+ if method_idx in avm_class.method_idxs:
+ m = Method(code, local_count)
+ avm_class.methods[avm_class.method_idxs[method_idx]] = m
+ exception_count = u30()
+ for _c2 in range(exception_count):
+ u30() # from
+ u30() # to
+ u30() # target
+ u30() # exc_type
+ u30() # var_name
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ parse_traits_info()
+
+ assert p + code_reader.tell() == len(code_tag)
+
+ def extract_class(self, class_name):
+ try:
+ return self._classes_by_name[class_name]
+ except KeyError:
+ raise ExtractorError('Class %r not found' % class_name)
+
+ def extract_function(self, avm_class, func_name):
+ if func_name in avm_class.method_pyfunctions:
+ return avm_class.method_pyfunctions[func_name]
+ if func_name in self._classes_by_name:
+ return self._classes_by_name[func_name].make_object()
+ if func_name not in avm_class.methods:
+ raise ExtractorError('Cannot find function %s.%s' % (
+ avm_class.name, func_name))
+ m = avm_class.methods[func_name]
+
+ def resfunc(args):
+ # Helper functions
+ coder = io.BytesIO(m.code)
+ s24 = lambda: _s24(coder)
+ u30 = lambda: _u30(coder)
+
+ registers = [avm_class.variables] + list(args) + [None] * m.local_count
+ stack = []
+ scopes = collections.deque([
+ self._classes_by_name, avm_class.variables])
+ while True:
+ opcode = _read_byte(coder)
+ if opcode == 17: # iftrue
+ offset = s24()
+ value = stack.pop()
+ if value:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 18: # iffalse
+ offset = s24()
+ value = stack.pop()
+ if not value:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 36: # pushbyte
+ v = _read_byte(coder)
+ stack.append(v)
+ elif opcode == 42: # dup
+ value = stack[-1]
+ stack.append(value)
+ elif opcode == 44: # pushstring
+ idx = u30()
+ stack.append(self.constant_strings[idx])
+ elif opcode == 48: # pushscope
+ new_scope = stack.pop()
+ scopes.append(new_scope)
+ elif opcode == 66: # construct
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ res = obj.avm_class.make_object()
+ stack.append(res)
+ elif opcode == 70: # callproperty
+ index = u30()
+ mname = self.multinames[index]
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+
+ if isinstance(obj, _AVMClass_Object):
+ func = self.extract_function(obj.avm_class, mname)
+ res = func(args)
+ stack.append(res)
+ continue
+ elif isinstance(obj, _ScopeDict):
+ if mname in obj.avm_class.method_names:
+ func = self.extract_function(obj.avm_class, mname)
+ res = func(args)
+ else:
+ res = obj[mname]
+ stack.append(res)
+ continue
+ elif isinstance(obj, compat_str):
+ if mname == 'split':
+ assert len(args) == 1
+ assert isinstance(args[0], compat_str)
+ if args[0] == '':
+ res = list(obj)
+ else:
+ res = obj.split(args[0])
+ stack.append(res)
+ continue
+ elif isinstance(obj, list):
+ if mname == 'slice':
+ assert len(args) == 1
+ assert isinstance(args[0], int)
+ res = obj[args[0]:]
+ stack.append(res)
+ continue
+ elif mname == 'join':
+ assert len(args) == 1
+ assert isinstance(args[0], compat_str)
+ res = args[0].join(obj)
+ stack.append(res)
+ continue
+ raise NotImplementedError(
+ 'Unsupported property %r on %r'
+ % (mname, obj))
+ elif opcode == 72: # returnvalue
+ res = stack.pop()
+ return res
+ elif opcode == 74: # constructproperty
+ index = u30()
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+
+ mname = self.multinames[index]
+ assert isinstance(obj, _AVMClass)
+
+ # We do not actually call the constructor for now;
+ # we just pretend it does nothing
+ stack.append(obj.make_object())
+ elif opcode == 79: # callpropvoid
+ index = u30()
+ mname = self.multinames[index]
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ if mname == 'reverse':
+ assert isinstance(obj, list)
+ obj.reverse()
+ else:
+ raise NotImplementedError(
+ 'Unsupported (void) property %r on %r'
+ % (mname, obj))
+ elif opcode == 86: # newarray
+ arg_count = u30()
+ arr = []
+ for i in range(arg_count):
+ arr.append(stack.pop())
+ arr = arr[::-1]
+ stack.append(arr)
+ elif opcode == 93: # findpropstrict
+ index = u30()
+ mname = self.multinames[index]
+ for s in reversed(scopes):
+ if mname in s:
+ res = s
+ break
+ else:
+ res = scopes[0]
+ stack.append(res[mname])
+ elif opcode == 94: # findproperty
+ index = u30()
+ mname = self.multinames[index]
+ for s in reversed(scopes):
+ if mname in s:
+ res = s
+ break
+ else:
+ res = avm_class.variables
+ stack.append(res)
+ elif opcode == 96: # getlex
+ index = u30()
+ mname = self.multinames[index]
+ for s in reversed(scopes):
+ if mname in s:
+ scope = s
+ break
+ else:
+ scope = avm_class.variables
+ # I cannot find where static variables are initialized
+ # so let's just return None
+ res = scope.get(mname)
+ stack.append(res)
+ elif opcode == 97: # setproperty
+ index = u30()
+ value = stack.pop()
+ idx = self.multinames[index]
+ if isinstance(idx, _Multiname):
+ idx = stack.pop()
+ obj = stack.pop()
+ obj[idx] = value
+ elif opcode == 98: # getlocal
+ index = u30()
+ stack.append(registers[index])
+ elif opcode == 99: # setlocal
+ index = u30()
+ value = stack.pop()
+ registers[index] = value
+ elif opcode == 102: # getproperty
+ index = u30()
+ pname = self.multinames[index]
+ if pname == 'length':
+ obj = stack.pop()
+ assert isinstance(obj, list)
+ stack.append(len(obj))
+ else: # Assume attribute access
+ idx = stack.pop()
+ assert isinstance(idx, int)
+ obj = stack.pop()
+ assert isinstance(obj, list)
+ stack.append(obj[idx])
+ elif opcode == 115: # convert_
+ value = stack.pop()
+ intvalue = int(value)
+ stack.append(intvalue)
+ elif opcode == 128: # coerce
+ u30()
+ elif opcode == 133: # coerce_s
+ assert isinstance(stack[-1], (type(None), compat_str))
+ elif opcode == 160: # add
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 + value2
+ stack.append(res)
+ elif opcode == 161: # subtract
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 - value2
+ stack.append(res)
+ elif opcode == 164: # modulo
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 % value2
+ stack.append(res)
+ elif opcode == 175: # greaterequals
+ value2 = stack.pop()
+ value1 = stack.pop()
+ result = value1 >= value2
+ stack.append(result)
+ elif opcode == 208: # getlocal_0
+ stack.append(registers[0])
+ elif opcode == 209: # getlocal_1
+ stack.append(registers[1])
+ elif opcode == 210: # getlocal_2
+ stack.append(registers[2])
+ elif opcode == 211: # getlocal_3
+ stack.append(registers[3])
+ elif opcode == 212: # setlocal_0
+ registers[0] = stack.pop()
+ elif opcode == 213: # setlocal_1
+ registers[1] = stack.pop()
+ elif opcode == 214: # setlocal_2
+ registers[2] = stack.pop()
+ elif opcode == 215: # setlocal_3
+ registers[3] = stack.pop()
+ else:
+ raise NotImplementedError(
+ 'Unsupported opcode %d' % opcode)
+
+ avm_class.method_pyfunctions[func_name] = resfunc
+ return resfunc
+
return u'%.2f%s' % (converted, suffix)
-def str_to_int(int_str):
- int_str = re.sub(r'[,\.]', u'', int_str)
- return int(int_str)
-
-
def get_term_width():
columns = os.environ.get('COLUMNS', None)
if columns:
return "HEAD"
-def int_or_none(v, scale=1, default=None, get_attr=None):
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr:
if v is not None:
v = getattr(v, get_attr, None)
- return default if v is None else (int(v) // scale)
+ return default if v is None else (int(v) * invscale // scale)
+
+
+def str_to_int(int_str):
+ if int_str is None:
+ return None
+ int_str = re.sub(r'[,\.]', u'', int_str)
+ return int(int_str)
-def float_or_none(v, scale=1, default=None):
- return default if v is None else (float(v) / scale)
+def float_or_none(v, scale=1, invscale=1, default=None):
+ return default if v is None else (float(v) * invscale / scale)
def parse_duration(s):
-__version__ = '2014.07.15'
+__version__ = '2014.07.20.2'