youtube_dl/InfoExtractors.py

   1 import base64
   2 import datetime
   3 import itertools
   4 import netrc
   5 import os
   6 import re
   7 import socket
   8 import time
   9 import email.utils
  10 import xml.etree.ElementTree
  11 import random
  12 import math
  13 import operator
  14 import hashlib
  15 import binascii
  16 import urllib
  17
  18 from .utils import *
  19 from .extractor.common import InfoExtractor, SearchInfoExtractor
  20
  21 from .extractor.ard import ARDIE
  22 from .extractor.arte import ArteTvIE
  23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  24 from .extractor.comedycentral import ComedyCentralIE
  25 from .extractor.collegehumor import CollegeHumorIE
  26 from .extractor.dailymotion import DailymotionIE
  27 from .extractor.depositfiles import DepositFilesIE
  28 from .extractor.escapist import EscapistIE
  29 from .extractor.facebook import FacebookIE
  30 from .extractor.gametrailers import GametrailersIE
  31 from .extractor.generic import GenericIE
  32 from .extractor.googleplus import GooglePlusIE
  33 from .extractor.googlesearch import GoogleSearchIE
  34 from .extractor.infoq import InfoQIE
  35 from .extractor.metacafe import MetacafeIE
  36 from .extractor.mixcloud import MixcloudIE
  37 from .extractor.mtv import MTVIE
  38 from .extractor.myvideo import MyVideoIE
  39 from .extractor.nba import NBAIE
  40 from .extractor.statigram import StatigramIE
  41 from .extractor.photobucket import PhotobucketIE
  42 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
  43 from .extractor.stanfordoc import StanfordOpenClassroomIE
  44 from .extractor.steam import SteamIE
  45 from .extractor.ted import TEDIE
  46 from .extractor.vimeo import VimeoIE
  47 from .extractor.worldstarhiphop import WorldStarHipHopIE
  48 from .extractor.xnxx import XNXXIE
  49 from .extractor.xvideos import XVideosIE
  50 from .extractor.yahoo import YahooIE, YahooSearchIE
  51 from .extractor.youku import YoukuIE
  52 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  53 from .extractor.zdf import ZDFIE
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67 class JustinTVIE(InfoExtractor):
  68     """Information extractor for justin.tv and twitch.tv"""
  69     # TODO: One broadcast may be split into multiple videos. The key
  70     # 'broadcast_id' is the same for all parts, and 'broadcast_part'
  71     # starts at 1 and increases. Can we treat all parts as one video?
  72
  73     _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
  74         (?:
  75             (?P<channelid>[^/]+)|
  76             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
  77             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
  78         )
  79         /?(?:\#.*)?$
  80         """
  81     _JUSTIN_PAGE_LIMIT = 100
  82     IE_NAME = u'justin.tv'
  83
  84     def report_download_page(self, channel, offset):
  85         """Report attempt to download a single page of videos."""
  86         self.to_screen(u'%s: Downloading video information from %d to %d' %
  87                 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
  88
  89     # Return count of items, list of *valid* items
  90     def _parse_page(self, url, video_id):
  91         webpage = self._download_webpage(url, video_id,
  92                                          u'Downloading video info JSON',
  93                                          u'unable to download video info JSON')
  94
  95         response = json.loads(webpage)
  96         if type(response) != list:
  97             error_text = response.get('error', 'unknown error')
  98             raise ExtractorError(u'Justin.tv API: %s' % error_text)
  99         info = []
 100         for clip in response:
 101             video_url = clip['video_file_url']
 102             if video_url:
 103                 video_extension = os.path.splitext(video_url)[1][1:]
 104                 video_date = re.sub('-', '', clip['start_time'][:10])
 105                 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
 106                 video_id = clip['id']
 107                 video_title = clip.get('title', video_id)
 108                 info.append({
 109                     'id': video_id,
 110                     'url': video_url,
 111                     'title': video_title,
 112                     'uploader': clip.get('channel_name', video_uploader_id),
 113                     'uploader_id': video_uploader_id,
 114                     'upload_date': video_date,
 115                     'ext': video_extension,
 116                 })
 117         return (len(response), info)
 118
 119     def _real_extract(self, url):
 120         mobj = re.match(self._VALID_URL, url)
 121         if mobj is None:
 122             raise ExtractorError(u'invalid URL: %s' % url)
 123
 124         api_base = 'http://api.justin.tv'
 125         paged = False
 126         if mobj.group('channelid'):
 127             paged = True
 128             video_id = mobj.group('channelid')
 129             api = api_base + '/channel/archives/%s.json' % video_id
 130         elif mobj.group('chapterid'):
 131             chapter_id = mobj.group('chapterid')
 132
 133             webpage = self._download_webpage(url, chapter_id)
 134             m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
 135             if not m:
 136                 raise ExtractorError(u'Cannot find archive of a chapter')
 137             archive_id = m.group(1)
 138
 139             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
 140             chapter_info_xml = self._download_webpage(api, chapter_id,
 141                                              note=u'Downloading chapter information',
 142                                              errnote=u'Chapter information download failed')
 143             doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
 144             for a in doc.findall('.//archive'):
 145                 if archive_id == a.find('./id').text:
 146                     break
 147             else:
 148                 raise ExtractorError(u'Could not find chapter in chapter information')
 149
 150             video_url = a.find('./video_file_url').text
 151             video_ext = video_url.rpartition('.')[2] or u'flv'
 152
 153             chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
 154             chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
 155                                    note='Downloading chapter metadata',
 156                                    errnote='Download of chapter metadata failed')
 157             chapter_info = json.loads(chapter_info_json)
 158
 159             bracket_start = int(doc.find('.//bracket_start').text)
 160             bracket_end = int(doc.find('.//bracket_end').text)
 161
 162             # TODO determine start (and probably fix up file)
 163             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
 164             #video_url += u'?start=' + TODO:start_timestamp
 165             # bracket_start is 13290, but we want 51670615
 166             self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
 167                                             u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
 168
 169             info = {
 170                 'id': u'c' + chapter_id,
 171                 'url': video_url,
 172                 'ext': video_ext,
 173                 'title': chapter_info['title'],
 174                 'thumbnail': chapter_info['preview'],
 175                 'description': chapter_info['description'],
 176                 'uploader': chapter_info['channel']['display_name'],
 177                 'uploader_id': chapter_info['channel']['name'],
 178             }
 179             return [info]
 180         else:
 181             video_id = mobj.group('videoid')
 182             api = api_base + '/broadcast/by_archive/%s.json' % video_id
 183
 184         self.report_extraction(video_id)
 185
 186         info = []
 187         offset = 0
 188         limit = self._JUSTIN_PAGE_LIMIT
 189         while True:
 190             if paged:
 191                 self.report_download_page(video_id, offset)
 192             page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
 193             page_count, page_info = self._parse_page(page_url, video_id)
 194             info.extend(page_info)
 195             if not paged or page_count != limit:
 196                 break
 197             offset += limit
 198         return info
 199
 200 class FunnyOrDieIE(InfoExtractor):
 201     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
 202
 203     def _real_extract(self, url):
 204         mobj = re.match(self._VALID_URL, url)
 205         if mobj is None:
 206             raise ExtractorError(u'invalid URL: %s' % url)
 207
 208         video_id = mobj.group('id')
 209         webpage = self._download_webpage(url, video_id)
 210
 211         video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
 212             webpage, u'video URL', flags=re.DOTALL)
 213
 214         title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
 215             r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
 216
 217         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 218             webpage, u'description', fatal=False, flags=re.DOTALL)
 219
 220         info = {
 221             'id': video_id,
 222             'url': video_url,
 223             'ext': 'mp4',
 224             'title': title,
 225             'description': video_description,
 226         }
 227         return [info]
 228
 229
 230 class UstreamIE(InfoExtractor):
 231     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
 232     IE_NAME = u'ustream'
 233
 234     def _real_extract(self, url):
 235         m = re.match(self._VALID_URL, url)
 236         video_id = m.group('videoID')
 237
 238         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
 239         webpage = self._download_webpage(url, video_id)
 240
 241         self.report_extraction(video_id)
 242
 243         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
 244             webpage, u'title')
 245
 246         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
 247             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 248
 249         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
 250             webpage, u'thumbnail', fatal=False)
 251
 252         info = {
 253                 'id': video_id,
 254                 'url': video_url,
 255                 'ext': 'flv',
 256                 'title': video_title,
 257                 'uploader': uploader,
 258                 'thumbnail': thumbnail,
 259                }
 260         return info
 261
 262
 263 class RBMARadioIE(InfoExtractor):
 264     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
 265
 266     def _real_extract(self, url):
 267         m = re.match(self._VALID_URL, url)
 268         video_id = m.group('videoID')
 269
 270         webpage = self._download_webpage(url, video_id)
 271
 272         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
 273             webpage, u'json data', flags=re.MULTILINE)
 274
 275         try:
 276             data = json.loads(json_data)
 277         except ValueError as e:
 278             raise ExtractorError(u'Invalid JSON: ' + str(e))
 279
 280         video_url = data['akamai_url'] + '&cbr=256'
 281         url_parts = compat_urllib_parse_urlparse(video_url)
 282         video_ext = url_parts.path.rpartition('.')[2]
 283         info = {
 284                 'id': video_id,
 285                 'url': video_url,
 286                 'ext': video_ext,
 287                 'title': data['title'],
 288                 'description': data.get('teaser_text'),
 289                 'location': data.get('country_of_origin'),
 290                 'uploader': data.get('host', {}).get('name'),
 291                 'uploader_id': data.get('host', {}).get('slug'),
 292                 'thumbnail': data.get('image', {}).get('large_url_2x'),
 293                 'duration': data.get('duration'),
 294         }
 295         return [info]
 296
 297
 298 class YouPornIE(InfoExtractor):
 299     """Information extractor for youporn.com."""
 300     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
 301
 302     def _print_formats(self, formats):
 303         """Print all available formats"""
 304         print(u'Available formats:')
 305         print(u'ext\t\tformat')
 306         print(u'---------------------------------')
 307         for format in formats:
 308             print(u'%s\t\t%s'  % (format['ext'], format['format']))
 309
 310     def _specific(self, req_format, formats):
 311         for x in formats:
 312             if(x["format"]==req_format):
 313                 return x
 314         return None
 315
 316     def _real_extract(self, url):
 317         mobj = re.match(self._VALID_URL, url)
 318         if mobj is None:
 319             raise ExtractorError(u'Invalid URL: %s' % url)
 320         video_id = mobj.group('videoid')
 321
 322         req = compat_urllib_request.Request(url)
 323         req.add_header('Cookie', 'age_verified=1')
 324         webpage = self._download_webpage(req, video_id)
 325
 326         # Get JSON parameters
 327         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
 328         try:
 329             params = json.loads(json_params)
 330         except:
 331             raise ExtractorError(u'Invalid JSON')
 332
 333         self.report_extraction(video_id)
 334         try:
 335             video_title = params['title']
 336             upload_date = unified_strdate(params['release_date_f'])
 337             video_description = params['description']
 338             video_uploader = params['submitted_by']
 339             thumbnail = params['thumbnails'][0]['image']
 340         except KeyError:
 341             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
 342
 343         # Get all of the formats available
 344         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
 345         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
 346             webpage, u'download list').strip()
 347
 348         # Get all of the links from the page
 349         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
 350         links = re.findall(LINK_RE, download_list_html)
 351         if(len(links) == 0):
 352             raise ExtractorError(u'ERROR: no known formats available for video')
 353
 354         self.to_screen(u'Links found: %d' % len(links))
 355
 356         formats = []
 357         for link in links:
 358
 359             # A link looks like this:
 360             # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
 361             # A path looks like this:
 362             # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
 363             video_url = unescapeHTML( link )
 364             path = compat_urllib_parse_urlparse( video_url ).path
 365             extension = os.path.splitext( path )[1][1:]
 366             format = path.split('/')[4].split('_')[:2]
 367             size = format[0]
 368             bitrate = format[1]
 369             format = "-".join( format )
 370             # title = u'%s-%s-%s' % (video_title, size, bitrate)
 371
 372             formats.append({
 373                 'id': video_id,
 374                 'url': video_url,
 375                 'uploader': video_uploader,
 376                 'upload_date': upload_date,
 377                 'title': video_title,
 378                 'ext': extension,
 379                 'format': format,
 380                 'thumbnail': thumbnail,
 381                 'description': video_description
 382             })
 383
 384         if self._downloader.params.get('listformats', None):
 385             self._print_formats(formats)
 386             return
 387
 388         req_format = self._downloader.params.get('format', None)
 389         self.to_screen(u'Format: %s' % req_format)
 390
 391         if req_format is None or req_format == 'best':
 392             return [formats[0]]
 393         elif req_format == 'worst':
 394             return [formats[-1]]
 395         elif req_format in ('-1', 'all'):
 396             return formats
 397         else:
 398             format = self._specific( req_format, formats )
 399             if result is None:
 400                 raise ExtractorError(u'Requested format not available')
 401             return [format]
 402
 403
 404
 405 class PornotubeIE(InfoExtractor):
 406     """Information extractor for pornotube.com."""
 407     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
 408
 409     def _real_extract(self, url):
 410         mobj = re.match(self._VALID_URL, url)
 411         if mobj is None:
 412             raise ExtractorError(u'Invalid URL: %s' % url)
 413
 414         video_id = mobj.group('videoid')
 415         video_title = mobj.group('title')
 416
 417         # Get webpage content
 418         webpage = self._download_webpage(url, video_id)
 419
 420         # Get the video URL
 421         VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
 422         video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
 423         video_url = compat_urllib_parse.unquote(video_url)
 424
 425         #Get the uploaded date
 426         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
 427         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
 428         if upload_date: upload_date = unified_strdate(upload_date)
 429
 430         info = {'id': video_id,
 431                 'url': video_url,
 432                 'uploader': None,
 433                 'upload_date': upload_date,
 434                 'title': video_title,
 435                 'ext': 'flv',
 436                 'format': 'flv'}
 437
 438         return [info]
 439
 440 class YouJizzIE(InfoExtractor):
 441     """Information extractor for youjizz.com."""
 442     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
 443
 444     def _real_extract(self, url):
 445         mobj = re.match(self._VALID_URL, url)
 446         if mobj is None:
 447             raise ExtractorError(u'Invalid URL: %s' % url)
 448
 449         video_id = mobj.group('videoid')
 450
 451         # Get webpage content
 452         webpage = self._download_webpage(url, video_id)
 453
 454         # Get the video title
 455         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
 456             webpage, u'title').strip()
 457
 458         # Get the embed page
 459         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
 460         if result is None:
 461             raise ExtractorError(u'ERROR: unable to extract embed page')
 462
 463         embed_page_url = result.group(0).strip()
 464         video_id = result.group('videoid')
 465
 466         webpage = self._download_webpage(embed_page_url, video_id)
 467
 468         # Get the video URL
 469         video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
 470             webpage, u'video URL')
 471
 472         info = {'id': video_id,
 473                 'url': video_url,
 474                 'title': video_title,
 475                 'ext': 'flv',
 476                 'format': 'flv',
 477                 'player_url': embed_page_url}
 478
 479         return [info]
 480
 481 class EightTracksIE(InfoExtractor):
 482     IE_NAME = '8tracks'
 483     _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
 484
 485     def _real_extract(self, url):
 486         mobj = re.match(self._VALID_URL, url)
 487         if mobj is None:
 488             raise ExtractorError(u'Invalid URL: %s' % url)
 489         playlist_id = mobj.group('id')
 490
 491         webpage = self._download_webpage(url, playlist_id)
 492
 493         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
 494         data = json.loads(json_like)
 495
 496         session = str(random.randint(0, 1000000000))
 497         mix_id = data['id']
 498         track_count = data['tracks_count']
 499         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
 500         next_url = first_url
 501         res = []
 502         for i in itertools.count():
 503             api_json = self._download_webpage(next_url, playlist_id,
 504                 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
 505                 errnote=u'Failed to download song information')
 506             api_data = json.loads(api_json)
 507             track_data = api_data[u'set']['track']
 508             info = {
 509                 'id': track_data['id'],
 510                 'url': track_data['track_file_stream_url'],
 511                 'title': track_data['performer'] + u' - ' + track_data['name'],
 512                 'raw_title': track_data['name'],
 513                 'uploader_id': data['user']['login'],
 514                 'ext': 'm4a',
 515             }
 516             res.append(info)
 517             if api_data['set']['at_last_track']:
 518                 break
 519             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
 520         return res
 521
 522 class KeekIE(InfoExtractor):
 523     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
 524     IE_NAME = u'keek'
 525
 526     def _real_extract(self, url):
 527         m = re.match(self._VALID_URL, url)
 528         video_id = m.group('videoID')
 529
 530         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
 531         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
 532         webpage = self._download_webpage(url, video_id)
 533
 534         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 535             webpage, u'title')
 536
 537         uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
 538             webpage, u'uploader', fatal=False)
 539
 540         info = {
 541                 'id': video_id,
 542                 'url': video_url,
 543                 'ext': 'mp4',
 544                 'title': video_title,
 545                 'thumbnail': thumbnail,
 546                 'uploader': uploader
 547         }
 548         return [info]
 549
 550
 551 class MySpassIE(InfoExtractor):
 552     _VALID_URL = r'http://www.myspass.de/.*'
 553
 554     def _real_extract(self, url):
 555         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
 556
 557         # video id is the last path element of the URL
 558         # usually there is a trailing slash, so also try the second but last
 559         url_path = compat_urllib_parse_urlparse(url).path
 560         url_parent_path, video_id = os.path.split(url_path)
 561         if not video_id:
 562             _, video_id = os.path.split(url_parent_path)
 563
 564         # get metadata
 565         metadata_url = META_DATA_URL_TEMPLATE % video_id
 566         metadata_text = self._download_webpage(metadata_url, video_id)
 567         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
 568
 569         # extract values from metadata
 570         url_flv_el = metadata.find('url_flv')
 571         if url_flv_el is None:
 572             raise ExtractorError(u'Unable to extract download url')
 573         video_url = url_flv_el.text
 574         extension = os.path.splitext(video_url)[1][1:]
 575         title_el = metadata.find('title')
 576         if title_el is None:
 577             raise ExtractorError(u'Unable to extract title')
 578         title = title_el.text
 579         format_id_el = metadata.find('format_id')
 580         if format_id_el is None:
 581             format = ext
 582         else:
 583             format = format_id_el.text
 584         description_el = metadata.find('description')
 585         if description_el is not None:
 586             description = description_el.text
 587         else:
 588             description = None
 589         imagePreview_el = metadata.find('imagePreview')
 590         if imagePreview_el is not None:
 591             thumbnail = imagePreview_el.text
 592         else:
 593             thumbnail = None
 594         info = {
 595             'id': video_id,
 596             'url': video_url,
 597             'title': title,
 598             'ext': extension,
 599             'format': format,
 600             'thumbnail': thumbnail,
 601             'description': description
 602         }
 603         return [info]
 604
 605 class SpiegelIE(InfoExtractor):
 606     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
 607
 608     def _real_extract(self, url):
 609         m = re.match(self._VALID_URL, url)
 610         video_id = m.group('videoID')
 611
 612         webpage = self._download_webpage(url, video_id)
 613
 614         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
 615             webpage, u'title')
 616
 617         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
 618         xml_code = self._download_webpage(xml_url, video_id,
 619                     note=u'Downloading XML', errnote=u'Failed to download XML')
 620
 621         idoc = xml.etree.ElementTree.fromstring(xml_code)
 622         last_type = idoc[-1]
 623         filename = last_type.findall('./filename')[0].text
 624         duration = float(last_type.findall('./duration')[0].text)
 625
 626         video_url = 'http://video2.spiegel.de/flash/' + filename
 627         video_ext = filename.rpartition('.')[2]
 628         info = {
 629             'id': video_id,
 630             'url': video_url,
 631             'ext': video_ext,
 632             'title': video_title,
 633             'duration': duration,
 634         }
 635         return [info]
 636
 637 class LiveLeakIE(InfoExtractor):
 638
 639     _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
 640     IE_NAME = u'liveleak'
 641
 642     def _real_extract(self, url):
 643         mobj = re.match(self._VALID_URL, url)
 644         if mobj is None:
 645             raise ExtractorError(u'Invalid URL: %s' % url)
 646
 647         video_id = mobj.group('video_id')
 648
 649         webpage = self._download_webpage(url, video_id)
 650
 651         video_url = self._search_regex(r'file: "(.*?)",',
 652             webpage, u'video URL')
 653
 654         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 655             webpage, u'title').replace('LiveLeak.com -', '').strip()
 656
 657         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 658             webpage, u'description', fatal=False)
 659
 660         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
 661             webpage, u'uploader', fatal=False)
 662
 663         info = {
 664             'id':  video_id,
 665             'url': video_url,
 666             'ext': 'mp4',
 667             'title': video_title,
 668             'description': video_description,
 669             'uploader': video_uploader
 670         }
 671
 672         return [info]
 673
 674
 675
 676 class TumblrIE(InfoExtractor):
 677     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
 678
 679     def _real_extract(self, url):
 680         m_url = re.match(self._VALID_URL, url)
 681         video_id = m_url.group('id')
 682         blog = m_url.group('blog_name')
 683
 684         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
 685         webpage = self._download_webpage(url, video_id)
 686
 687         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
 688         video = re.search(re_video, webpage)
 689         if video is None:
 690            raise ExtractorError(u'Unable to extract video')
 691         video_url = video.group('video_url')
 692         ext = video.group('ext')
 693
 694         video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
 695             webpage, u'thumbnail', fatal=False)  # We pick the first poster
 696         if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
 697
 698         # The only place where you can get a title, it's not complete,
 699         # but searching in other places doesn't work for all videos
 700         video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
 701             webpage, u'title', flags=re.DOTALL)
 702
 703         return [{'id': video_id,
 704                  'url': video_url,
 705                  'title': video_title,
 706                  'thumbnail': video_thumbnail,
 707                  'ext': ext
 708                  }]
 709
 710 class BandcampIE(InfoExtractor):
 711     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
 712
 713     def _real_extract(self, url):
 714         mobj = re.match(self._VALID_URL, url)
 715         title = mobj.group('title')
 716         webpage = self._download_webpage(url, title)
 717         # We get the link to the free download page
 718         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
 719         if m_download is None:
 720             raise ExtractorError(u'No free songs found')
 721
 722         download_link = m_download.group(1)
 723         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
 724                        webpage, re.MULTILINE|re.DOTALL).group('id')
 725
 726         download_webpage = self._download_webpage(download_link, id,
 727                                                   'Downloading free downloads page')
 728         # We get the dictionary of the track from some javascrip code
 729         info = re.search(r'items: (.*?),$',
 730                          download_webpage, re.MULTILINE).group(1)
 731         info = json.loads(info)[0]
 732         # We pick mp3-320 for now, until format selection can be easily implemented.
 733         mp3_info = info[u'downloads'][u'mp3-320']
 734         # If we try to use this url it says the link has expired
 735         initial_url = mp3_info[u'url']
 736         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
 737         m_url = re.match(re_url, initial_url)
 738         #We build the url we will use to get the final track url
 739         # This url is build in Bandcamp in the script download_bunde_*.js
 740         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
 741         final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
 742         # If we could correctly generate the .rand field the url would be
 743         #in the "download_url" key
 744         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
 745
 746         track_info = {'id':id,
 747                       'title' : info[u'title'],
 748                       'ext' :   'mp3',
 749                       'url' :   final_url,
 750                       'thumbnail' : info[u'thumb_url'],
 751                       'uploader' :  info[u'artist']
 752                       }
 753
 754         return [track_info]
 755
 756 class RedTubeIE(InfoExtractor):
 757     """Information Extractor for redtube"""
 758     _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
 759
 760     def _real_extract(self,url):
 761         mobj = re.match(self._VALID_URL, url)
 762         if mobj is None:
 763             raise ExtractorError(u'Invalid URL: %s' % url)
 764
 765         video_id = mobj.group('id')
 766         video_extension = 'mp4'
 767         webpage = self._download_webpage(url, video_id)
 768
 769         self.report_extraction(video_id)
 770
 771         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
 772             webpage, u'video URL')
 773
 774         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
 775             webpage, u'title')
 776
 777         return [{
 778             'id':       video_id,
 779             'url':      video_url,
 780             'ext':      video_extension,
 781             'title':    video_title,
 782         }]
 783
 784 class InaIE(InfoExtractor):
 785     """Information Extractor for Ina.fr"""
 786     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
 787
 788     def _real_extract(self,url):
 789         mobj = re.match(self._VALID_URL, url)
 790
 791         video_id = mobj.group('id')
 792         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
 793         video_extension = 'mp4'
 794         webpage = self._download_webpage(mrss_url, video_id)
 795
 796         self.report_extraction(video_id)
 797
 798         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
 799             webpage, u'video URL')
 800
 801         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
 802             webpage, u'title')
 803
 804         return [{
 805             'id':       video_id,
 806             'url':      video_url,
 807             'ext':      video_extension,
 808             'title':    video_title,
 809         }]
 810
 811 class HowcastIE(InfoExtractor):
 812     """Information Extractor for Howcast.com"""
 813     _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
 814
 815     def _real_extract(self, url):
 816         mobj = re.match(self._VALID_URL, url)
 817
 818         video_id = mobj.group('id')
 819         webpage_url = 'http://www.howcast.com/videos/' + video_id
 820         webpage = self._download_webpage(webpage_url, video_id)
 821
 822         self.report_extraction(video_id)
 823
 824         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
 825             webpage, u'video URL')
 826
 827         video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
 828             webpage, u'title')
 829
 830         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
 831             webpage, u'description', fatal=False)
 832
 833         thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
 834             webpage, u'thumbnail', fatal=False)
 835
 836         return [{
 837             'id':       video_id,
 838             'url':      video_url,
 839             'ext':      'mp4',
 840             'title':    video_title,
 841             'description': video_description,
 842             'thumbnail': thumbnail,
 843         }]
 844
 845 class VineIE(InfoExtractor):
 846     """Information Extractor for Vine.co"""
 847     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
 848
 849     def _real_extract(self, url):
 850         mobj = re.match(self._VALID_URL, url)
 851
 852         video_id = mobj.group('id')
 853         webpage_url = 'https://vine.co/v/' + video_id
 854         webpage = self._download_webpage(webpage_url, video_id)
 855
 856         self.report_extraction(video_id)
 857
 858         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
 859             webpage, u'video URL')
 860
 861         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 862             webpage, u'title')
 863
 864         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
 865             webpage, u'thumbnail', fatal=False)
 866
 867         uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
 868             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 869
 870         return [{
 871             'id':        video_id,
 872             'url':       video_url,
 873             'ext':       'mp4',
 874             'title':     video_title,
 875             'thumbnail': thumbnail,
 876             'uploader':  uploader,
 877         }]
 878
 879 class FlickrIE(InfoExtractor):
 880     """Information Extractor for Flickr videos"""
 881     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
 882
 883     def _real_extract(self, url):
 884         mobj = re.match(self._VALID_URL, url)
 885
 886         video_id = mobj.group('id')
 887         video_uploader_id = mobj.group('uploader_id')
 888         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
 889         webpage = self._download_webpage(webpage_url, video_id)
 890
 891         secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
 892
 893         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
 894         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 895
 896         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
 897             first_xml, u'node_id')
 898
 899         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
 900         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
 901
 902         self.report_extraction(video_id)
 903
 904         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
 905         if mobj is None:
 906             raise ExtractorError(u'Unable to extract video url')
 907         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 908
 909         video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
 910             webpage, u'video title')
 911
 912         video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
 913             webpage, u'description', fatal=False)
 914
 915         thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
 916             webpage, u'thumbnail', fatal=False)
 917
 918         return [{
 919             'id':          video_id,
 920             'url':         video_url,
 921             'ext':         'mp4',
 922             'title':       video_title,
 923             'description': video_description,
 924             'thumbnail':   thumbnail,
 925             'uploader_id': video_uploader_id,
 926         }]
 927
 928 class TeamcocoIE(InfoExtractor):
 929     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
 930
 931     def _real_extract(self, url):
 932         mobj = re.match(self._VALID_URL, url)
 933         if mobj is None:
 934             raise ExtractorError(u'Invalid URL: %s' % url)
 935         url_title = mobj.group('url_title')
 936         webpage = self._download_webpage(url, url_title)
 937
 938         video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
 939             webpage, u'video id')
 940
 941         self.report_extraction(video_id)
 942
 943         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 944             webpage, u'title')
 945
 946         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
 947             webpage, u'thumbnail', fatal=False)
 948
 949         video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
 950             webpage, u'description', fatal=False)
 951
 952         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 953         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
 954
 955         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
 956             data, u'video URL')
 957
 958         return [{
 959             'id':          video_id,
 960             'url':         video_url,
 961             'ext':         'mp4',
 962             'title':       video_title,
 963             'thumbnail':   thumbnail,
 964             'description': video_description,
 965         }]
 966
 967 class XHamsterIE(InfoExtractor):
 968     """Information Extractor for xHamster"""
 969     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
 970
 971     def _real_extract(self,url):
 972         mobj = re.match(self._VALID_URL, url)
 973
 974         video_id = mobj.group('id')
 975         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
 976         webpage = self._download_webpage(mrss_url, video_id)
 977
 978         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
 979         if mobj is None:
 980             raise ExtractorError(u'Unable to extract media URL')
 981         if len(mobj.group('server')) == 0:
 982             video_url = compat_urllib_parse.unquote(mobj.group('file'))
 983         else:
 984             video_url = mobj.group('server')+'/key='+mobj.group('file')
 985         video_extension = video_url.split('.')[-1]
 986
 987         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
 988             webpage, u'title')
 989
 990         # Can't see the description anywhere in the UI
 991         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
 992         #     webpage, u'description', fatal=False)
 993         # if video_description: video_description = unescapeHTML(video_description)
 994
 995         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
 996         if mobj:
 997             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
 998         else:
 999             video_upload_date = None
1000             self._downloader.report_warning(u'Unable to extract upload date')
1001
1002         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
1003             webpage, u'uploader id', default=u'anonymous')
1004
1005         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
1006             webpage, u'thumbnail', fatal=False)
1007
1008         return [{
1009             'id':       video_id,
1010             'url':      video_url,
1011             'ext':      video_extension,
1012             'title':    video_title,
1013             # 'description': video_description,
1014             'upload_date': video_upload_date,
1015             'uploader_id': video_uploader_id,
1016             'thumbnail': video_thumbnail
1017         }]
1018
1019 class HypemIE(InfoExtractor):
1020     """Information Extractor for hypem"""
1021     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
1022
1023     def _real_extract(self, url):
1024         mobj = re.match(self._VALID_URL, url)
1025         if mobj is None:
1026             raise ExtractorError(u'Invalid URL: %s' % url)
1027         track_id = mobj.group(1)
1028
1029         data = { 'ax': 1, 'ts': time.time() }
1030         data_encoded = compat_urllib_parse.urlencode(data)
1031         complete_url = url + "?" + data_encoded
1032         request = compat_urllib_request.Request(complete_url)
1033         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
1034         cookie = urlh.headers.get('Set-Cookie', '')
1035
1036         self.report_extraction(track_id)
1037
1038         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
1039             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
1040         try:
1041             track_list = json.loads(html_tracks)
1042             track = track_list[u'tracks'][0]
1043         except ValueError:
1044             raise ExtractorError(u'Hypemachine contained invalid JSON.')
1045
1046         key = track[u"key"]
1047         track_id = track[u"id"]
1048         artist = track[u"artist"]
1049         title = track[u"song"]
1050
1051         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
1052         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
1053         request.add_header('cookie', cookie)
1054         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
1055         try:
1056             song_data = json.loads(song_data_json)
1057         except ValueError:
1058             raise ExtractorError(u'Hypemachine contained invalid JSON.')
1059         final_url = song_data[u"url"]
1060
1061         return [{
1062             'id':       track_id,
1063             'url':      final_url,
1064             'ext':      "mp3",
1065             'title':    title,
1066             'artist':   artist,
1067         }]
1068
1069 class Vbox7IE(InfoExtractor):
1070     """Information Extractor for Vbox7"""
1071     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
1072
1073     def _real_extract(self,url):
1074         mobj = re.match(self._VALID_URL, url)
1075         if mobj is None:
1076             raise ExtractorError(u'Invalid URL: %s' % url)
1077         video_id = mobj.group(1)
1078
1079         redirect_page, urlh = self._download_webpage_handle(url, video_id)
1080         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
1081         redirect_url = urlh.geturl() + new_location
1082         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
1083
1084         title = self._html_search_regex(r'<title>(.*)</title>',
1085             webpage, u'title').split('/')[0].strip()
1086
1087         ext = "flv"
1088         info_url = "http://vbox7.com/play/magare.do"
1089         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
1090         info_request = compat_urllib_request.Request(info_url, data)
1091         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
1092         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
1093         if info_response is None:
1094             raise ExtractorError(u'Unable to extract the media url')
1095         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
1096
1097         return [{
1098             'id':        video_id,
1099             'url':       final_url,
1100             'ext':       ext,
1101             'title':     title,
1102             'thumbnail': thumbnail_url,
1103         }]
1104
1105
1106 def gen_extractors():
1107     """ Return a list of an instance of every supported extractor.
1108     The order does matter; the first extractor matched is the one handling the URL.
1109     """
1110     return [
1111         YoutubePlaylistIE(),
1112         YoutubeChannelIE(),
1113         YoutubeUserIE(),
1114         YoutubeSearchIE(),
1115         YoutubeIE(),
1116         MetacafeIE(),
1117         DailymotionIE(),
1118         GoogleSearchIE(),
1119         PhotobucketIE(),
1120         YahooIE(),
1121         YahooSearchIE(),
1122         DepositFilesIE(),
1123         FacebookIE(),
1124         BlipTVIE(),
1125         BlipTVUserIE(),
1126         VimeoIE(),
1127         MyVideoIE(),
1128         ComedyCentralIE(),
1129         EscapistIE(),
1130         CollegeHumorIE(),
1131         XVideosIE(),
1132         SoundcloudSetIE(),
1133         SoundcloudIE(),
1134         InfoQIE(),
1135         MixcloudIE(),
1136         StanfordOpenClassroomIE(),
1137         MTVIE(),
1138         YoukuIE(),
1139         XNXXIE(),
1140         YouJizzIE(),
1141         PornotubeIE(),
1142         YouPornIE(),
1143         GooglePlusIE(),
1144         ArteTvIE(),
1145         NBAIE(),
1146         WorldStarHipHopIE(),
1147         JustinTVIE(),
1148         FunnyOrDieIE(),
1149         SteamIE(),
1150         UstreamIE(),
1151         RBMARadioIE(),
1152         EightTracksIE(),
1153         KeekIE(),
1154         TEDIE(),
1155         MySpassIE(),
1156         SpiegelIE(),
1157         LiveLeakIE(),
1158         ARDIE(),
1159         ZDFIE(),
1160         TumblrIE(),
1161         BandcampIE(),
1162         RedTubeIE(),
1163         InaIE(),
1164         HowcastIE(),
1165         VineIE(),
1166         FlickrIE(),
1167         TeamcocoIE(),
1168         XHamsterIE(),
1169         HypemIE(),
1170         Vbox7IE(),
1171         GametrailersIE(),
1172         StatigramIE(),
1173         GenericIE()
1174     ]
1175
1176 def get_info_extractor(ie_name):
1177     """Returns the info extractor class with the given ie_name"""
1178     return globals()[ie_name+'IE']