/youtube-dl .
--no-cache-dir Disable filesystem caching
--bidi-workaround Work around terminals that lack bidirectional
- text support. Requires fribidi executable in PATH
+ text support. Requires bidiv or fribidi
+ executable in PATH
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
BambuserChannelIE,
BandcampAlbumIE,
SmotriCommunityIE,
- SmotriUserIE
+ SmotriUserIE,
+ IviCompilationIE
)
self.assertEqual(result['title'], u'Building Dynamic Websites')
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
self.assertEqual(len(result['entries']), 10)
+
+ def test_ivi_compilation(self):
+ dl = FakeYDL()
+ ie = IviCompilationIE(dl)
+ result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], u'dezhurnyi_angel')
+ self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)')
+ self.assertTrue(len(result['entries']) >= 36)
+
+ def test_ivi_compilation_season(self):
+ dl = FakeYDL()
+ ie = IviCompilationIE(dl)
+ result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
+ self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
+ self.assertTrue(len(result['entries']) >= 20)
if __name__ == '__main__':
width_args = []
else:
width_args = ['-w', str(width)]
- self._fribidi = subprocess.Popen(
- ['fribidi', '-c', 'UTF-8'] + width_args,
+ sp_kwargs = dict(
stdin=subprocess.PIPE,
stdout=slave,
stderr=self._err_file)
- self._fribidi_channel = os.fdopen(master, 'rb')
+ try:
+ self._output_process = subprocess.Popen(
+ ['bidiv'] + width_args, **sp_kwargs
+ )
+ except OSError:
+ self._output_process = subprocess.Popen(
+ ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
pp.set_downloader(self)
def _bidi_workaround(self, message):
- if not hasattr(self, '_fribidi_channel'):
+ if not hasattr(self, '_output_channel'):
return message
+ assert hasattr(self, '_output_process')
assert type(message) == type(u'')
line_count = message.count(u'\n') + 1
- self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
- self._fribidi.stdin.flush()
- res = u''.join(self._fribidi_channel.readline().decode('utf-8')
+ self._output_process.stdin.write((message + u'\n').encode('utf-8'))
+ self._output_process.stdin.flush()
+ res = u''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len(u'\n')]
compat_print,
DateRange,
decodeOption,
- determine_ext,
get_term_width,
DownloadError,
get_cachedir,
type=float, default=None, help=optparse.SUPPRESS_HELP)
general.add_option(
'--bidi-workaround', dest='bidi_workaround', action='store_true',
- help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
+ help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
selection.add_option(
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)]
- all_urls = [url for url in all_urls if url not in matchedUrls]
for mu in matchedUrls:
compat_print(u' ' + mu)
sys.exit(0)
-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
return decrypted_data
+def aes_cbc_decrypt(data, key, iv):
+ """
+ Decrypt with aes in CBC mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data=[]
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+ block += [0]*(BLOCK_SIZE_BYTES - len(block))
+
+ decrypted_block = aes_decrypt(block, expanded_key)
+ decrypted_data += xor(decrypted_block, previous_cipher_block)
+ previous_cipher_block = block
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
def key_expansion(data):
"""
Generate key schedule
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
+
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+
+ return data
+
+def aes_decrypt(data, expanded_key):
+ """
+ Decrypt one block with aes
+
+ @param {int[]} data 16-Byte cipher
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte state
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ for i in range(rounds, 0, -1):
+ data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+ if i != rounds:
+ data = mix_columns_inv(data)
+ data = shift_rows_inv(data)
+ data = sub_bytes_inv(data)
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
-MIX_COLUMN_MATRIX = ((2,3,1,1),
- (1,2,3,1),
- (1,1,2,3),
- (3,1,1,2))
+SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
+MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
+ (0x1,0x2,0x3,0x1),
+ (0x1,0x1,0x2,0x3),
+ (0x3,0x1,0x1,0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
+ (0x9,0xE,0xB,0xD),
+ (0xD,0x9,0xE,0xB),
+ (0xB,0xD,0x9,0xE))
+RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
+ 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
+ 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
+ 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
+ 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
+ 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
+ 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
+ 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
+ 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
+ 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
+ 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
+ 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
+ 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
+ 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
+ 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
+RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def sub_bytes(data):
return [SBOX[x] for x in data]
+def sub_bytes_inv(data):
+ return [SBOX_INV[x] for x in data]
+
def rotate(data):
return data[1:] + [data[0]]
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
-def mix_column(data):
+def rijndael_mul(a, b):
+ if(a==0 or b==0):
+ return 0
+ return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
+
+def mix_column(data, matrix):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
- addend = data[column]
- if MIX_COLUMN_MATRIX[row][column] in (2,3):
- addend <<= 1
- if addend > 0xff:
- addend &= 0xff
- addend ^= 0x1b
- if MIX_COLUMN_MATRIX[row][column] == 3:
- addend ^= data[column]
- mixed ^= addend & 0xff
+ # xor is (+) and (-)
+ mixed ^= rijndael_mul(data[column], matrix[row][column])
data_mixed.append(mixed)
return data_mixed
-def mix_columns(data):
+def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
- data_mixed += mix_column(column)
+ data_mixed += mix_column(column, matrix)
return data_mixed
+def mix_columns_inv(data):
+ return mix_columns(data, MIX_COLUMN_MATRIX_INV)
+
def shift_rows(data):
data_shifted = []
for column in range(4):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
+def shift_rows_inv(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
+ return data_shifted
+
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .anitube import AnitubeIE
+from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
+from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
from .infoq import InfoQIE
from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
--- /dev/null
+#coding: utf-8
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ HEADRequest,
+)
+
+
+class AparatIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+
+ _TEST = {
+ u'url': u'http://www.aparat.com/v/wP8On',
+ u'file': u'wP8On.mp4',
+ u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
+ u'info_dict': {
+ u"title": u"تیم گلکسی 11 - زومیت",
+ },
+ #u'skip': u'Extremely unreliable',
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ # Note: There is an easier-to-parse configuration at
+ # http://www.aparat.com/video/video/config/videohash/%video_id
+ # but the URL in there does not work
+ embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
+ video_id + u'/vt/frame')
+ webpage = self._download_webpage(embed_url, video_id)
+
+ video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+ for i, video_url in enumerate(video_urls):
+ req = HEADRequest(video_url)
+ res = self._request_webpage(
+ req, video_id, note=u'Testing video URL %d' % i, errnote=False)
+ if res:
+ break
+ else:
+ raise ExtractorError(u'No working video URLs found')
+
+ title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
+ thumbnail = self._search_regex(
+ r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'thumbnail': thumbnail,
+ }
class BlinkxIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
+ _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
_IE_NAME = u'blinkx'
_TEST = {
})
elif m['type'] == 'original':
duration = m['d']
+ elif m['type'] == 'youtube':
+ yt_id = m['link']
+ self.to_screen(u'Youtube video detected: %s' % yt_id)
+ return self.url_result(yt_id, 'Youtube', video_id=yt_id)
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
info = None
urlh = self._request_webpage(request, None, False,
u'unable to download video info webpage')
+
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
- info = {
+ return {
'id': title,
'url': url,
'uploader': None,
'ext': ext,
'urlhandle': urlh
}
- if info is None: # Regular URL
- try:
- json_code_bytes = urlh.read()
- json_code = json_code_bytes.decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
-
- try:
- json_data = json.loads(json_code)
- if 'Post' in json_data:
- data = json_data['Post']
- else:
- data = json_data
-
- upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
- if 'additionalMedia' in data:
- formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
- best_format = formats[-1]
- video_url = best_format['url']
- else:
- video_url = data['media']['url']
- umobj = re.match(self._URL_EXT, video_url)
- if umobj is None:
- raise ValueError('Can not determine filename extension')
- ext = umobj.group(1)
-
- info = {
- 'id': compat_str(data['item_id']),
- 'url': video_url,
- 'uploader': data['display_name'],
- 'upload_date': upload_date,
- 'title': data['title'],
- 'ext': ext,
- 'format': data['media']['mimeType'],
- 'thumbnail': data['thumbnailUrl'],
- 'description': data['description'],
- 'player_url': data['embedUrl'],
- 'user_agent': 'iTunes/10.6.1',
- }
- except (ValueError,KeyError) as err:
- raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
-
- return [info]
+
+ try:
+ json_code_bytes = urlh.read()
+ json_code = json_code_bytes.decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
+
+ try:
+ json_data = json.loads(json_code)
+ if 'Post' in json_data:
+ data = json_data['Post']
+ else:
+ data = json_data
+
+ upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
+ if 'additionalMedia' in data:
+ formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
+ best_format = formats[-1]
+ video_url = best_format['url']
+ else:
+ video_url = data['media']['url']
+ umobj = re.match(self._URL_EXT, video_url)
+ if umobj is None:
+ raise ValueError('Can not determine filename extension')
+ ext = umobj.group(1)
+
+ return {
+ 'id': compat_str(data['item_id']),
+ 'url': video_url,
+ 'uploader': data['display_name'],
+ 'upload_date': upload_date,
+ 'title': data['title'],
+ 'ext': ext,
+ 'format': data['media']['mimeType'],
+ 'thumbnail': data['thumbnailUrl'],
+ 'description': data['description'],
+ 'player_url': data['embedUrl'],
+ 'user_agent': 'iTunes/10.6.1',
+ }
+ except (ValueError, KeyError) as err:
+ raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
class BlipTVUserIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
- u'md5': u'8eccab865181d29ec2958f32a6a754f5',
+ u'md5': u'5423e113865d26e40624dce2e4b45d95',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ if errnote is False:
+ return False
if errnote is None:
errnote = u'Unable to download webpage'
errmsg = u'%s: %s' % (errnote, compat_str(err))
self.to_screen(u'Logging in')
#Methods for following #608
- def url_result(self, url, ie=None, video_id=None):
+ @staticmethod
+ def url_result(url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
if video_id is not None:
video_info['id'] = video_id
return video_info
- def playlist_result(self, entries, playlist_id=None, playlist_title=None):
+ @staticmethod
+ def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
--- /dev/null
+# encoding: utf-8
+import re, base64, zlib
+from hashlib import sha1
+from math import pow, sqrt, floor
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_parse,
+ compat_urllib_request,
+ bytes_to_intlist,
+ intlist_to_bytes,
+ unified_strdate,
+ clean_html,
+)
+from ..aes import (
+ aes_cbc_decrypt,
+ inc,
+)
+
+class CrunchyrollIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _TESTS = [{
+ u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+ u'file': u'645513.flv',
+ #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
+ u'info_dict': {
+ u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+ u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
+ u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+ u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
+ u'upload_date': u'20131013',
+ },
+ u'params': {
+ # rtmp
+ u'skip_download': True,
+ },
+ }]
+
+ _FORMAT_IDS = {
+ u'360': (u'60', u'106'),
+ u'480': (u'61', u'106'),
+ u'720': (u'62', u'106'),
+ u'1080': (u'80', u'108'),
+ }
+
+ def _decrypt_subtitles(self, data, iv, id):
+ data = bytes_to_intlist(data)
+ iv = bytes_to_intlist(iv)
+ id = int(id)
+
+ def obfuscate_key_aux(count, modulo, start):
+ output = list(start)
+ for _ in range(count):
+ output.append(output[-1] + output[-2])
+ # cut off start values
+ output = output[2:]
+ output = list(map(lambda x: x % modulo + 33, output))
+ return output
+
+ def obfuscate_key(key):
+ num1 = int(floor(pow(2, 25) * sqrt(6.9)))
+ num2 = (num1 ^ key) << 5
+ num3 = key ^ num1
+ num4 = num3 ^ (num3 >> 3) ^ num2
+ prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
+ shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
+ # Extend 160 Bit hash to 256 Bit
+ return shaHash + [0] * 12
+
+ key = obfuscate_key(id)
+ class Counter:
+ __value = iv
+ def next_value(self):
+ temp = self.__value
+ self.__value = inc(self.__value)
+ return temp
+ decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
+ return zlib.decompress(decrypted_data)
+
+ def _convert_subtitles_to_srt(self, subtitles):
+ i=1
+ output = u''
+ for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
+ start = start.replace(u'.', u',')
+ end = end.replace(u'.', u',')
+ text = clean_html(text)
+ text = text.replace(u'\\N', u'\n')
+ if not text:
+ continue
+ output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+ i+=1
+ return output
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ webpage_url = u'http://www.' + mobj.group('url')
+ video_id = mobj.group(u'video_id')
+ webpage = self._download_webpage(webpage_url, video_id)
+ note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
+ if note_m:
+ raise ExtractorError(note_m)
+
+ video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
+ video_title = re.sub(r' {2,}', u' ', video_title)
+ video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
+ if not video_description:
+ video_description = None
+ video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
+ if video_upload_date:
+ video_upload_date = unified_strdate(video_upload_date)
+ video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
+
+ playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
+ playerdata_req = compat_urllib_request.Request(playerdata_url)
+ playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
+ playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
+ playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
+
+ stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
+ video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
+
+ formats = []
+ for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
+ stream_quality, stream_format = self._FORMAT_IDS[fmt]
+ video_format = fmt+u'p'
+ streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
+ # urlencode doesn't work!
+ streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
+ streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
+ streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
+ streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
+ video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
+ video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
+ formats.append({
+ u'url': video_url,
+ u'play_path': video_play_path,
+ u'ext': 'flv',
+ u'format': video_format,
+ u'format_id': video_format,
+ })
+
+ subtitles = {}
+ for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+ sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
+ video_id, note=u'Downloading subtitles for '+sub_name)
+ id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
+ iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
+ data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
+ if not id or not iv or not data:
+ continue
+ id = int(id)
+ iv = base64.b64decode(iv)
+ data = base64.b64decode(data)
+
+ subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
+ lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
+ if not lang_code:
+ continue
+ subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
+
+ return {
+ u'id': video_id,
+ u'title': video_title,
+ u'description': video_description,
+ u'thumbnail': video_thumbnail,
+ u'uploader': video_uploader,
+ u'upload_date': video_upload_date,
+ u'subtitles': subtitles,
+ u'formats': formats,
+ }
compat_urlparse,
ExtractorError,
+ HEADRequest,
smuggle_url,
unescapeHTML,
unified_strdate,
url_basename,
)
from .brightcove import BrightcoveIE
+from .ooyala import OoyalaIE
class GenericIE(InfoExtractor):
u'title': u'trailer',
u'upload_date': u'20100513',
}
- }
+ },
+ # ooyala video
+ {
+ u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+ u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c',
+ u'info_dict': {
+ u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+ u'ext': u'mp4',
+ u'title': u'2cc213299525360.mov', #that's what we get
+ },
+ },
]
def report_download_webpage(self, video_id):
def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url."""
- class HeadRequest(compat_urllib_request.Request):
- def get_method(self):
- return "HEAD"
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
"""
Subclass the HTTPRedirectHandler to make it use our
- HeadRequest also on the redirected URL
+ HEADRequest also on the redirected URL
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
if code in (301, 302, 303, 307):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
- return HeadRequest(newurl,
+ return HEADRequest(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
- response = opener.open(HeadRequest(url))
+ response = opener.open(HEADRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
return response
self.to_screen(u'Brightcove video detected.')
return self.url_result(bc_url, 'Brightcove')
- # Look for embedded Vimeo player
+ # Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
if mobj:
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl, 'Vimeo')
+ # Look for embedded (swf embed) Vimeo player
+ mobj = re.search(
+ r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+ if mobj:
+ return self.url_result(mobj.group(1), 'Vimeo')
+
# Look for embedded YouTube player
- matches = re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
+ matches = re.findall(r'''(?x)
+ (?:<iframe[^>]+?src=|embedSWF\(\s*)
+ (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+ (?:embed|v)/.+?)
+ \1''', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for Ooyala videos
+ mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+ if mobj is not None:
+ return OoyalaIE._build_url_result(mobj.group(1))
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
- _VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+ _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- webpage = self._download_webpage(url,video_id)
+ webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
--- /dev/null
+# encoding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_request,
+ ExtractorError,
+)
+
+
+class IviIE(InfoExtractor):
+ IE_DESC = u'ivi.ru'
+ IE_NAME = u'ivi'
+ _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+
+ _TESTS = [
+ # Single movie
+ {
+ u'url': u'http://www.ivi.ru/watch/53141',
+ u'file': u'53141.mp4',
+ u'md5': u'6ff5be2254e796ed346251d117196cf4',
+ u'info_dict': {
+ u'title': u'Иван Васильевич меняет профессию',
+ u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
+ u'duration': 5498,
+ u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
+ },
+ u'skip': u'Only works from Russia',
+ },
+ # Serial's serie
+ {
+ u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
+ u'file': u'74791.mp4',
+ u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
+ u'info_dict': {
+ u'title': u'Дежурный ангел - 1 серия',
+ u'duration': 2490,
+ u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
+ },
+ u'skip': u'Only works from Russia',
+ }
+ ]
+
+ # Sorted by quality
+ _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
+
+ # Sorted by size
+ _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
+
+ def _extract_description(self, html):
+ m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
+ return m.group('description') if m is not None else None
+
+ def _extract_comment_count(self, html):
+ m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
+ return int(m.group('commentcount')) if m is not None else 0
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('videoid')
+
+ api_url = 'http://api.digitalaccess.ru/api/json/'
+
+ data = {u'method': u'da.content.get',
+ u'params': [video_id, {u'site': u's183',
+ u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
+ u'contentid': video_id
+ }
+ ]
+ }
+
+ request = compat_urllib_request.Request(api_url, json.dumps(data))
+
+ video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
+ video_json = json.loads(video_json_page)
+
+ if u'error' in video_json:
+ error = video_json[u'error']
+ if error[u'origin'] == u'NoRedisValidData':
+ raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+ raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
+
+ result = video_json[u'result']
+
+ formats = [{'url': x[u'url'],
+ 'format_id': x[u'content_format']
+ } for x in result[u'files'] if x[u'content_format'] in self._known_formats]
+ formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
+
+ if len(formats) == 0:
+ self._downloader.report_warning(u'No media links available for %s' % video_id)
+ return
+
+ duration = result[u'duration']
+ compilation = result[u'compilation']
+ title = result[u'title']
+
+ title = '%s - %s' % (compilation, title) if compilation is not None else title
+
+ previews = result[u'preview']
+ previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
+ thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
+
+ video_page = self._download_webpage(url, video_id, u'Downloading video page')
+ description = self._extract_description(video_page)
+ comment_count = self._extract_comment_count(video_page)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
+
+
+class IviCompilationIE(InfoExtractor):
+ IE_DESC = u'ivi.ru compilations'
+ IE_NAME = u'ivi:compilation'
+ _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+
+ def _extract_entries(self, html, compilation_id):
+ return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
+ for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ compilation_id = mobj.group('compilationid')
+ season_id = mobj.group('seasonid')
+
+ if season_id is not None: # Season link
+ season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
+ playlist_id = '%s/season%s' % (compilation_id, season_id)
+ playlist_title = self._html_search_meta(u'title', season_page, u'title')
+ entries = self._extract_entries(season_page, compilation_id)
+ else: # Compilation link
+ compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
+ playlist_id = compilation_id
+ playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
+ seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
+ if len(seasons) == 0: # No seasons in this compilation
+ entries = self._extract_entries(compilation_page, compilation_id)
+ else:
+ entries = []
+ for season_id in seasons:
+ season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+ compilation_id, u'Downloading season %s web page' % season_id)
+ entries.extend(self._extract_entries(season_page, compilation_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
\ No newline at end of file
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
-
- _TESTS = [{
- u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
- u'file': u'165624.mp4',
- u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
- u'info_dict': {
- u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
- },
- },
- {
- u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
- u'file': u'718370.mp3',
- u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
- u'info_dict': {
- u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
- },
- }]
+
+ # No tests, MDR regularily deletes its videos
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
def _url_for_embed_code(embed_code):
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+ @classmethod
+ def _build_url_result(cls, embed_code):
+ return cls.url_result(cls._url_for_embed_code(embed_code),
+ ie=cls.ie_key())
+
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
# encoding: utf-8
+import os.path
import re
import json
import hashlib
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
+ url_basename,
)
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
-
+
+ # Warning if video is unavailable
+ warning = self._html_search_regex(
+ r'<div class="videoUnModer">(.*?)</div>', video_page,
+ u'warning messagef', default=None)
+ if warning is not None:
+ self._downloader.report_warning(
+ u'Video %s may not be available; smotri said: %s ' %
+ (video_id, warning))
+
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
- video_title = video_url.rsplit('/', 1)[-1]
+ video_title = os.path.splitext(url_basename(video_url))[0]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
- if video_description.endswith(END_TEXT):
+ if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
- if video_description.startswith(START_TEXT):
+ if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
- upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
- video_upload_date = (
- (
- upload_date_m.group('year') +
- upload_date_m.group('month') +
- upload_date_m.group('day')
+ if upload_date_str:
+ upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
+ video_upload_date = (
+ (
+ upload_date_m.group('year') +
+ upload_date_m.group('month') +
+ upload_date_m.group('day')
+ )
+ if upload_date_m else None
)
- if upload_date_m else None
- )
+ else:
+ video_upload_date = None
duration_str = self._search_meta(u'duration', video_page)
- duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
- video_duration = (
- (
- (int(duration_m.group('hours')) * 60 * 60) +
- (int(duration_m.group('minutes')) * 60) +
- int(duration_m.group('seconds'))
+ if duration_str:
+ duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
+ video_duration = (
+ (
+ (int(duration_m.group('hours')) * 60 * 60) +
+ (int(duration_m.group('minutes')) * 60) +
+ int(duration_m.group('seconds'))
+ )
+ if duration_m else None
)
- if duration_m else None
- )
+ else:
+ video_duration = None
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
- 'video_duration': video_duration,
+ 'duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
"""
_VALID_URL = r'''^(?:https?://)?
- (?:(?:(?:www\.)?soundcloud\.com/
+ (?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
(?!sets/)(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
_TEST = {
u'url': u'http://vbox7.com/play:249bb972c2',
u'file': u'249bb972c2.flv',
- u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
+ u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
u'info_dict': {
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
}
unsmuggle_url,
)
+
class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?://)?
+ (?:(?:www|(?P<player>player))\.)?
+ vimeo(?P<pro>pro)?\.com/
+ (?:.*?/)?
+ (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
+ (?:videos?/)?
+ (?P<id>[0-9]+)
+ /?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
# page by page until there are no video ids - it means we got
# all of them.
- video_ids = []
+ url_results = []
for pagenum in itertools.count(0):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
break
# Extract video identifiers
- ids_in_page = []
- for entry in response['feed']['entry']:
- ids_in_page.append(entry['id']['$t'].split('/')[-1])
- video_ids.extend(ids_in_page)
+ entries = response['feed']['entry']
+ for entry in entries:
+ title = entry['title']['$t']
+ video_id = entry['id']['$t'].split('/')[-1]
+ url_results.append({
+ '_type': 'url',
+ 'url': video_id,
+ 'ie_key': 'Youtube',
+ 'id': 'video_id',
+ 'title': title,
+ })
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# are no more ids on further pages - no need to query
# again.
- if len(ids_in_page) < self._GDATA_PAGE_SIZE:
+ if len(entries) < self._GDATA_PAGE_SIZE:
break
- url_results = [
- self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip(u'/').split(u'/')[-1]
+
+
+class HEADRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return "HEAD"
-__version__ = '2013.12.17.2'
+__version__ = '2013.12.23.2'