youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     sanitized_Request,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import (
  34     BrightcoveLegacyIE,
  35     BrightcoveNewIE,
  36 )
  37 from .nbc import NBCSportsVPlayerIE
  38 from .ooyala import OoyalaIE
  39 from .rutv import RUTVIE
  40 from .tvc import TVCIE
  41 from .sportbox import SportBoxEmbedIE
  42 from .smotri import SmotriIE
  43 from .myvi import MyviIE
  44 from .condenast import CondeNastIE
  45 from .udn import UDNEmbedIE
  46 from .senateisvp import SenateISVPIE
  47 from .svt import SVTIE
  48 from .pornhub import PornHubIE
  49 from .xhamster import XHamsterEmbedIE
  50 from .tnaflix import TNAFlixNetworkEmbedIE
  51 from .vimeo import VimeoIE
  52 from .dailymotion import DailymotionCloudIE
  53 from .onionstudios import OnionStudiosIE
  54 from .snagfilms import SnagFilmsEmbedIE
  55 from .screenwavemedia import ScreenwaveMediaIE
  56 from .mtv import MTVServicesEmbeddedIE
  57 from .pladform import PladformIE
  58 from .videomore import VideomoreIE
  59 from .googledrive import GoogleDriveIE
  60 from .jwplatform import JWPlatformIE
  61 from .digiteka import DigitekaIE
  62
  63
  64 class GenericIE(InfoExtractor):
  65     IE_DESC = 'Generic downloader that works on some sites'
  66     _VALID_URL = r'.*'
  67     IE_NAME = 'generic'
  68     _TESTS = [
  69         # Direct link to a video
  70         {
  71             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  72             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  73             'info_dict': {
  74                 'id': 'trailer',
  75                 'ext': 'mp4',
  76                 'title': 'trailer',
  77                 'upload_date': '20100513',
  78             }
  79         },
  80         # Direct link to media delivered compressed (until Accept-Encoding is *)
  81         {
  82             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  83             'md5': '128c42e68b13950268b648275386fc74',
  84             'info_dict': {
  85                 'id': 'FictionJunction-Parallel_Hearts',
  86                 'ext': 'flac',
  87                 'title': 'FictionJunction-Parallel_Hearts',
  88                 'upload_date': '20140522',
  89             },
  90             'expected_warnings': [
  91                 'URL could be a direct video link, returning it as such.'
  92             ]
  93         },
  94         # Direct download with broken HEAD
  95         {
  96             'url': 'http://ai-radio.org:8000/radio.opus',
  97             'info_dict': {
  98                 'id': 'radio',
  99                 'ext': 'opus',
 100                 'title': 'radio',
 101             },
 102             'params': {
 103                 'skip_download': True,  # infinite live stream
 104             },
 105             'expected_warnings': [
 106                 r'501.*Not Implemented'
 107             ],
 108         },
 109         # Direct link with incorrect MIME type
 110         {
 111             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 112             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 113             'info_dict': {
 114                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 115                 'id': '5_Lennart_Poettering_-_Systemd',
 116                 'ext': 'webm',
 117                 'title': '5_Lennart_Poettering_-_Systemd',
 118                 'upload_date': '20141120',
 119             },
 120             'expected_warnings': [
 121                 'URL could be a direct video link, returning it as such.'
 122             ]
 123         },
 124         # RSS feed
 125         {
 126             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 127             'info_dict': {
 128                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 129                 'title': 'Zero Punctuation',
 130                 'description': 're:.*groundbreaking video review series.*'
 131             },
 132             'playlist_mincount': 11,
 133         },
 134         # RSS feed with enclosure
 135         {
 136             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 137             'info_dict': {
 138                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 139                 'ext': 'm4v',
 140                 'upload_date': '20150228',
 141                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 142             }
 143         },
 144         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 145         {
 146             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 147             'info_dict': {
 148                 'id': 'smil',
 149                 'ext': 'mp4',
 150                 'title': 'Automatics, robotics and biocybernetics',
 151                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 152                 'upload_date': '20130627',
 153                 'formats': 'mincount:16',
 154                 'subtitles': 'mincount:1',
 155             },
 156             'params': {
 157                 'force_generic_extractor': True,
 158                 'skip_download': True,
 159             },
 160         },
 161         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 162         {
 163             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 164             'info_dict': {
 165                 'id': 'hds',
 166                 'ext': 'flv',
 167                 'title': 'hds',
 168                 'formats': 'mincount:1',
 169             },
 170             'params': {
 171                 'skip_download': True,
 172             },
 173         },
 174         # SMIL from https://www.restudy.dk/video/play/id/1637
 175         {
 176             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 177             'info_dict': {
 178                 'id': 'video_1637',
 179                 'ext': 'flv',
 180                 'title': 'video_1637',
 181                 'formats': 'mincount:3',
 182             },
 183             'params': {
 184                 'skip_download': True,
 185             },
 186         },
 187         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 188         {
 189             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 190             'info_dict': {
 191                 'id': 'smil-service',
 192                 'ext': 'flv',
 193                 'title': 'smil-service',
 194                 'formats': 'mincount:1',
 195             },
 196             'params': {
 197                 'skip_download': True,
 198             },
 199         },
 200         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 201         {
 202             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 203             'info_dict': {
 204                 'id': '4719370',
 205                 'ext': 'mp4',
 206                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 207                 'formats': 'mincount:3',
 208             },
 209             'params': {
 210                 'skip_download': True,
 211             },
 212         },
 213         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 214         {
 215             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 216             'info_dict': {
 217                 'id': 'mZlp2ctYIUEB',
 218                 'ext': 'mp4',
 219                 'title': 'Tikibad ontruimd wegens brand',
 220                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 221                 'thumbnail': 're:^https?://.*\.jpg$',
 222                 'duration': 33,
 223             },
 224             'params': {
 225                 'skip_download': True,
 226             },
 227         },
 228         # MPD from http://dash-mse-test.appspot.com/media.html
 229         {
 230             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 231             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 232             'info_dict': {
 233                 'id': 'car-20120827-manifest',
 234                 'ext': 'mp4',
 235                 'title': 'car-20120827-manifest',
 236                 'formats': 'mincount:9',
 237             },
 238             'params': {
 239                 'format': 'bestvideo',
 240             },
 241         },
 242         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 243         {
 244             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 245             'info_dict': {
 246                 'id': 'content',
 247                 'ext': 'mp4',
 248                 'title': 'content',
 249                 'formats': 'mincount:8',
 250             },
 251             'params': {
 252                 # m3u8 downloads
 253                 'skip_download': True,
 254             }
 255         },
 256         # google redirect
 257         {
 258             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 259             'info_dict': {
 260                 'id': 'cmQHVoWB5FY',
 261                 'ext': 'mp4',
 262                 'upload_date': '20130224',
 263                 'uploader_id': 'TheVerge',
 264                 'description': 're:^Chris Ziegler takes a look at the\.*',
 265                 'uploader': 'The Verge',
 266                 'title': 'First Firefox OS phones side-by-side',
 267             },
 268             'params': {
 269                 'skip_download': False,
 270             }
 271         },
 272         {
 273             # redirect in Refresh HTTP header
 274             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 275             'info_dict': {
 276                 'id': 'pO8h3EaFRdo',
 277                 'ext': 'mp4',
 278                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 279                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 280                 'upload_date': '20150917',
 281                 'uploader_id': 'brtvofficial',
 282                 'uploader': 'Boiler Room',
 283             },
 284             'params': {
 285                 'skip_download': False,
 286             },
 287         },
 288         {
 289             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 290             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 291             'info_dict': {
 292                 'id': '13601338388002',
 293                 'ext': 'mp4',
 294                 'uploader': 'www.hodiho.fr',
 295                 'title': 'R\u00e9gis plante sa Jeep',
 296             }
 297         },
 298         # bandcamp page with custom domain
 299         {
 300             'add_ie': ['Bandcamp'],
 301             'url': 'http://bronyrock.com/track/the-pony-mash',
 302             'info_dict': {
 303                 'id': '3235767654',
 304                 'ext': 'mp3',
 305                 'title': 'The Pony Mash',
 306                 'uploader': 'M_Pallante',
 307             },
 308             'skip': 'There is a limit of 200 free downloads / month for the test song',
 309         },
 310         # embedded brightcove video
 311         # it also tests brightcove videos that need to set the 'Referer' in the
 312         # http requests
 313         {
 314             'add_ie': ['BrightcoveLegacy'],
 315             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 316             'info_dict': {
 317                 'id': '2765128793001',
 318                 'ext': 'mp4',
 319                 'title': 'Le cours de bourse : l’analyse technique',
 320                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 321                 'uploader': 'BFM BUSINESS',
 322             },
 323             'params': {
 324                 'skip_download': True,
 325             },
 326         },
 327         {
 328             # https://github.com/rg3/youtube-dl/issues/2253
 329             'url': 'http://bcove.me/i6nfkrc3',
 330             'md5': '0ba9446db037002366bab3b3eb30c88c',
 331             'info_dict': {
 332                 'id': '3101154703001',
 333                 'ext': 'mp4',
 334                 'title': 'Still no power',
 335                 'uploader': 'thestar.com',
 336                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 337             },
 338             'add_ie': ['BrightcoveLegacy'],
 339         },
 340         {
 341             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 342             'md5': 'fb973ecf6e4a78a67453647444222983',
 343             'info_dict': {
 344                 'id': '3414141473001',
 345                 'ext': 'mp4',
 346                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 347                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 348                 'uploader': 'Championat',
 349             },
 350         },
 351         {
 352             # https://github.com/rg3/youtube-dl/issues/3541
 353             'add_ie': ['BrightcoveLegacy'],
 354             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 355             'info_dict': {
 356                 'id': '3866516442001',
 357                 'ext': 'mp4',
 358                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 359                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 360                 'uploader': 'SBS Broadcasting',
 361             },
 362             'skip': 'Restricted to Netherlands',
 363             'params': {
 364                 'skip_download': True,  # m3u8 download
 365             },
 366         },
 367         # ooyala video
 368         {
 369             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 370             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 371             'info_dict': {
 372                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 373                 'ext': 'mp4',
 374                 'title': '2cc213299525360.mov',  # that's what we get
 375                 'duration': 238.231,
 376             },
 377             'add_ie': ['Ooyala'],
 378         },
 379         {
 380             # ooyala video embedded with http://player.ooyala.com/iframe.js
 381             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 382             'info_dict': {
 383                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 384                 'ext': 'mp4',
 385                 'title': '"Steve Jobs: Man in the Machine" trailer',
 386                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 387                 'duration': 135.427,
 388             },
 389             'params': {
 390                 'skip_download': True,
 391             },
 392         },
 393         # multiple ooyala embeds on SBN network websites
 394         {
 395             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 396             'info_dict': {
 397                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 398                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 399             },
 400             'playlist_mincount': 3,
 401             'params': {
 402                 'skip_download': True,
 403             },
 404             'add_ie': ['Ooyala'],
 405         },
 406         # embed.ly video
 407         {
 408             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 409             'info_dict': {
 410                 'id': '9ODmcdjQcHQ',
 411                 'ext': 'mp4',
 412                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 413                 'upload_date': '20140225',
 414                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 415                 'uploader': 'Tested',
 416                 'uploader_id': 'testedcom',
 417             },
 418             # No need to test YoutubeIE here
 419             'params': {
 420                 'skip_download': True,
 421             },
 422         },
 423         # funnyordie embed
 424         {
 425             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 426             'info_dict': {
 427                 'id': '18e820ec3f',
 428                 'ext': 'mp4',
 429                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 430                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 431             },
 432         },
 433         # RUTV embed
 434         {
 435             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 436             'info_dict': {
 437                 'id': '776940',
 438                 'ext': 'mp4',
 439                 'title': 'Охотское море стало целиком российским',
 440                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 441             },
 442             'params': {
 443                 # m3u8 download
 444                 'skip_download': True,
 445             },
 446         },
 447         # TVC embed
 448         {
 449             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 450             'info_dict': {
 451                 'id': '55304',
 452                 'ext': 'mp4',
 453                 'title': 'Дошкольное воспитание',
 454             },
 455         },
 456         # SportBox embed
 457         {
 458             'url': 'http://www.vestifinance.ru/articles/25753',
 459             'info_dict': {
 460                 'id': '25753',
 461                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 462             },
 463             'playlist': [{
 464                 'info_dict': {
 465                     'id': '370908',
 466                     'title': 'Госзаказ. День 3',
 467                     'ext': 'mp4',
 468                 }
 469             }, {
 470                 'info_dict': {
 471                     'id': '370905',
 472                     'title': 'Госзаказ. День 2',
 473                     'ext': 'mp4',
 474                 }
 475             }, {
 476                 'info_dict': {
 477                     'id': '370902',
 478                     'title': 'Госзаказ. День 1',
 479                     'ext': 'mp4',
 480                 }
 481             }],
 482             'params': {
 483                 # m3u8 download
 484                 'skip_download': True,
 485             },
 486         },
 487         # Myvi.ru embed
 488         {
 489             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 490             'info_dict': {
 491                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 492                 'ext': 'mp4',
 493                 'title': 'Ужастики, русский трейлер (2015)',
 494                 'thumbnail': 're:^https?://.*\.jpg$',
 495                 'duration': 153,
 496             }
 497         },
 498         # XHamster embed
 499         {
 500             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 501             'info_dict': {
 502                 'id': 'showthread',
 503                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 504             },
 505             'playlist_mincount': 7,
 506         },
 507         # Embedded TED video
 508         {
 509             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 510             'md5': '65fdff94098e4a607385a60c5177c638',
 511             'info_dict': {
 512                 'id': '1969',
 513                 'ext': 'mp4',
 514                 'title': 'Hidden miracles of the natural world',
 515                 'uploader': 'Louie Schwartzberg',
 516                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 517             }
 518         },
 519         # Embedded Ustream video
 520         {
 521             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 522             'md5': '27b99cdb639c9b12a79bca876a073417',
 523             'info_dict': {
 524                 'id': '45734260',
 525                 'ext': 'flv',
 526                 'uploader': 'AU SPA:  The NSA and Privacy',
 527                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 528             }
 529         },
 530         # nowvideo embed hidden behind percent encoding
 531         {
 532             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 533             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 534             'info_dict': {
 535                 'id': '06e53103ca9aa',
 536                 'ext': 'flv',
 537                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 538                 'description': 'No description',
 539             },
 540         },
 541         # arte embed
 542         {
 543             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 544             'md5': '7653032cbb25bf6c80d80f217055fa43',
 545             'info_dict': {
 546                 'id': '048195-004_PLUS7-F',
 547                 'ext': 'flv',
 548                 'title': 'X:enius',
 549                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 550                 'upload_date': '20140320',
 551             },
 552             'params': {
 553                 'skip_download': 'Requires rtmpdump'
 554             }
 555         },
 556         # francetv embed
 557         {
 558             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 559             'info_dict': {
 560                 'id': 'EV_30231',
 561                 'ext': 'mp4',
 562                 'title': 'Alcaline, le concert avec Calogero',
 563                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 564                 'upload_date': '20150226',
 565                 'timestamp': 1424989860,
 566                 'duration': 5400,
 567             },
 568             'params': {
 569                 # m3u8 downloads
 570                 'skip_download': True,
 571             },
 572             'expected_warnings': [
 573                 'Forbidden'
 574             ]
 575         },
 576         # Condé Nast embed
 577         {
 578             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 579             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 580             'info_dict': {
 581                 'id': '53501be369702d3275860000',
 582                 'ext': 'mp4',
 583                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 584             }
 585         },
 586         # Dailymotion embed
 587         {
 588             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 589             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 590             'info_dict': {
 591                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 592                 'ext': 'mp4',
 593                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 594                 'uploader': 'Spi0n',
 595             },
 596             'add_ie': ['Dailymotion'],
 597         },
 598         # YouTube embed
 599         {
 600             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 601             'info_dict': {
 602                 'id': 'FXRb4ykk4S0',
 603                 'ext': 'mp4',
 604                 'title': 'The NBL Auction 2014',
 605                 'uploader': 'BADMINTON England',
 606                 'uploader_id': 'BADMINTONEvents',
 607                 'upload_date': '20140603',
 608                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 609             },
 610             'add_ie': ['Youtube'],
 611             'params': {
 612                 'skip_download': True,
 613             }
 614         },
 615         # MTVSercices embed
 616         {
 617             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 618             'md5': '35727f82f58c76d996fc188f9755b0d5',
 619             'info_dict': {
 620                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 621                 'ext': 'mp4',
 622                 'title': 'Review',
 623                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 624             },
 625         },
 626         # YouTube embed via <data-embed-url="">
 627         {
 628             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 629             'info_dict': {
 630                 'id': '4vAffPZIT44',
 631                 'ext': 'mp4',
 632                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 633                 'uploader': 'Gameloft',
 634                 'uploader_id': 'gameloft',
 635                 'upload_date': '20140828',
 636                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 637             },
 638             'params': {
 639                 'skip_download': True,
 640             }
 641         },
 642         # Camtasia studio
 643         {
 644             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 645             'playlist': [{
 646                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 647                 'info_dict': {
 648                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 649                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 650                     'ext': 'flv',
 651                     'duration': 2235.90,
 652                 }
 653             }, {
 654                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 655                 'info_dict': {
 656                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 657                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 658                     'ext': 'flv',
 659                     'duration': 2235.93,
 660                 }
 661             }],
 662             'info_dict': {
 663                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 664             }
 665         },
 666         # Flowplayer
 667         {
 668             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 669             'md5': '9d65602bf31c6e20014319c7d07fba27',
 670             'info_dict': {
 671                 'id': '5123ea6d5e5a7',
 672                 'ext': 'mp4',
 673                 'age_limit': 18,
 674                 'uploader': 'www.handjobhub.com',
 675                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 676             }
 677         },
 678         # Multiple brightcove videos
 679         # https://github.com/rg3/youtube-dl/issues/2283
 680         {
 681             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 682             'info_dict': {
 683                 'id': 'always-never',
 684                 'title': 'Always / Never - The New Yorker',
 685             },
 686             'playlist_count': 3,
 687             'params': {
 688                 'extract_flat': False,
 689                 'skip_download': True,
 690             }
 691         },
 692         # MLB embed
 693         {
 694             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 695             'md5': '96f09a37e44da40dd083e12d9a683327',
 696             'info_dict': {
 697                 'id': '33322633',
 698                 'ext': 'mp4',
 699                 'title': 'Ump changes call to ball',
 700                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 701                 'duration': 48,
 702                 'timestamp': 1401537900,
 703                 'upload_date': '20140531',
 704                 'thumbnail': 're:^https?://.*\.jpg$',
 705             },
 706         },
 707         # Wistia embed
 708         {
 709             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 710             'md5': '8788b683c777a5cf25621eaf286d0c23',
 711             'info_dict': {
 712                 'id': '1cfaf6b7ea',
 713                 'ext': 'mov',
 714                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 715                 'duration': 643.0,
 716                 'filesize': 182808282,
 717                 'uploader': 'education-portal.com',
 718             },
 719         },
 720         {
 721             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 722             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 723             'info_dict': {
 724                 'id': 'uxjb0lwrcz',
 725                 'ext': 'mp4',
 726                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 727                 'duration': 1715.0,
 728                 'uploader': 'thoughtworks.wistia.com',
 729             },
 730         },
 731         # Soundcloud embed
 732         {
 733             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 734             'info_dict': {
 735                 'id': '174391317',
 736                 'ext': 'mp3',
 737                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 738                 'uploader': 'Sophos Security',
 739                 'title': 'Chet Chat 171 - Oct 29, 2014',
 740                 'upload_date': '20141029',
 741             }
 742         },
 743         # Livestream embed
 744         {
 745             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 746             'info_dict': {
 747                 'id': '67864563',
 748                 'ext': 'flv',
 749                 'upload_date': '20141112',
 750                 'title': 'Rosetta #CometLanding webcast HL 10',
 751             }
 752         },
 753         # LazyYT
 754         {
 755             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 756             'info_dict': {
 757                 'id': '1986',
 758                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 759             },
 760             'playlist_mincount': 2,
 761         },
 762         # Cinchcast embed
 763         {
 764             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 765             'info_dict': {
 766                 'id': '7141703',
 767                 'ext': 'mp3',
 768                 'upload_date': '20141126',
 769                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 770             }
 771         },
 772         # Cinerama player
 773         {
 774             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 775             'info_dict': {
 776                 'id': '730m_DandD_1901_512k',
 777                 'ext': 'mp4',
 778                 'uploader': 'www.abc.net.au',
 779                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 780             }
 781         },
 782         # embedded viddler video
 783         {
 784             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 785             'info_dict': {
 786                 'id': '4d03aad9',
 787                 'ext': 'mp4',
 788                 'uploader': 'deadspin',
 789                 'title': 'WALL-TO-GORTAT',
 790                 'timestamp': 1422285291,
 791                 'upload_date': '20150126',
 792             },
 793             'add_ie': ['Viddler'],
 794         },
 795         # Libsyn embed
 796         {
 797             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 798             'info_dict': {
 799                 'id': '3377616',
 800                 'ext': 'mp3',
 801                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 802                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 803                 'upload_date': '20150220',
 804             },
 805         },
 806         # jwplayer YouTube
 807         {
 808             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 809             'info_dict': {
 810                 'id': 'Mrj4DVp2zeA',
 811                 'ext': 'mp4',
 812                 'upload_date': '20150212',
 813                 'uploader': 'The National Archives UK',
 814                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 815                 'uploader_id': 'NationalArchives08',
 816                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 817             },
 818         },
 819         # rtl.nl embed
 820         {
 821             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 822             'playlist_mincount': 5,
 823             'info_dict': {
 824                 'id': 'aanslagen-kopenhagen',
 825                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 826             }
 827         },
 828         # Zapiks embed
 829         {
 830             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 831             'info_dict': {
 832                 'id': '118046',
 833                 'ext': 'mp4',
 834                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 835             }
 836         },
 837         # Kaltura embed
 838         {
 839             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 840             'info_dict': {
 841                 'id': '1_eergr3h1',
 842                 'ext': 'mp4',
 843                 'upload_date': '20150226',
 844                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 845                 'timestamp': int,
 846                 'title': 'John Carlson Postgame 2/25/15',
 847             },
 848         },
 849         # Kaltura embed (different embed code)
 850         {
 851             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 852             'info_dict': {
 853                 'id': '1_a52wc67y',
 854                 'ext': 'flv',
 855                 'upload_date': '20150127',
 856                 'uploader_id': 'PremierMedia',
 857                 'timestamp': int,
 858                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 859             },
 860         },
 861         # Kaltura embed protected with referrer
 862         {
 863             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 864             'info_dict': {
 865                 'id': '1_g4fbemnq',
 866                 'ext': 'mp4',
 867                 'title': 'Violetta - Achter De Schermen - Ruggero',
 868                 'description': 'Achter de schermen met Ruggero',
 869                 'timestamp': 1435133761,
 870                 'upload_date': '20150624',
 871                 'uploader_id': 'echojecka',
 872             },
 873         },
 874         # Eagle.Platform embed (generic URL)
 875         {
 876             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 877             'info_dict': {
 878                 'id': '227304',
 879                 'ext': 'mp4',
 880                 'title': 'Навальный вышел на свободу',
 881                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 882                 'thumbnail': 're:^https?://.*\.jpg$',
 883                 'duration': 87,
 884                 'view_count': int,
 885                 'age_limit': 0,
 886             },
 887         },
 888         # ClipYou (Eagle.Platform) embed (custom URL)
 889         {
 890             'url': 'http://muz-tv.ru/play/7129/',
 891             'info_dict': {
 892                 'id': '12820',
 893                 'ext': 'mp4',
 894                 'title': "'O Sole Mio",
 895                 'thumbnail': 're:^https?://.*\.jpg$',
 896                 'duration': 216,
 897                 'view_count': int,
 898             },
 899         },
 900         # Pladform embed
 901         {
 902             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 903             'info_dict': {
 904                 'id': '100183293',
 905                 'ext': 'mp4',
 906                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 907                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 908                 'thumbnail': 're:^https?://.*\.jpg$',
 909                 'duration': 694,
 910                 'age_limit': 0,
 911             },
 912         },
 913         # Playwire embed
 914         {
 915             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 916             'info_dict': {
 917                 'id': '3519514',
 918                 'ext': 'mp4',
 919                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 920                 'thumbnail': 're:^https?://.*\.png$',
 921                 'duration': 45.115,
 922             },
 923         },
 924         # 5min embed
 925         {
 926             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 927             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 928             'info_dict': {
 929                 'id': '518726732',
 930                 'ext': 'mp4',
 931                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 932             },
 933         },
 934         # SVT embed
 935         {
 936             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 937             'info_dict': {
 938                 'id': '2900353',
 939                 'ext': 'flv',
 940                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 941                 'duration': 27,
 942                 'age_limit': 0,
 943             },
 944         },
 945         # Crooks and Liars embed
 946         {
 947             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 948             'info_dict': {
 949                 'id': '8RUoRhRi',
 950                 'ext': 'mp4',
 951                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 952                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 953                 'timestamp': 1428207000,
 954                 'upload_date': '20150405',
 955                 'uploader': 'Heather',
 956             },
 957         },
 958         # Crooks and Liars external embed
 959         {
 960             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 961             'info_dict': {
 962                 'id': 'MTE3MjUtMzQ2MzA',
 963                 'ext': 'mp4',
 964                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 965                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 966                 'timestamp': 1265032391,
 967                 'upload_date': '20100201',
 968                 'uploader': 'Heather',
 969             },
 970         },
 971         # NBC Sports vplayer embed
 972         {
 973             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 974             'info_dict': {
 975                 'id': 'ln7x1qSThw4k',
 976                 'ext': 'flv',
 977                 'title': "PFT Live: New leader in the 'new-look' defense",
 978                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 979             },
 980         },
 981         # UDN embed
 982         {
 983             'url': 'http://www.udn.com/news/story/7314/822787',
 984             'md5': 'fd2060e988c326991037b9aff9df21a6',
 985             'info_dict': {
 986                 'id': '300346',
 987                 'ext': 'mp4',
 988                 'title': '中一中男師變性 全校師生力挺',
 989                 'thumbnail': 're:^https?://.*\.jpg$',
 990             }
 991         },
 992         # Ooyala embed
 993         {
 994             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 995             'info_dict': {
 996                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 997                 'ext': 'mp4',
 998                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
 999                 'title': 'This is what separates the Excel masters from the wannabes',
1000                 'duration': 191.933,
1001             },
1002             'params': {
1003                 # m3u8 downloads
1004                 'skip_download': True,
1005             }
1006         },
1007         # Contains a SMIL manifest
1008         {
1009             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1010             'info_dict': {
1011                 'id': 'file',
1012                 'ext': 'flv',
1013                 'title': '+ Football: Lottery Champions League Europe',
1014                 'uploader': 'www.telewebion.com',
1015             },
1016             'params': {
1017                 # rtmpe downloads
1018                 'skip_download': True,
1019             }
1020         },
1021         # Brightcove URL in single quotes
1022         {
1023             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1024             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1025             'info_dict': {
1026                 'id': '4255764656001',
1027                 'ext': 'mp4',
1028                 'title': 'SN Presents: Russell Martin, World Citizen',
1029                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1030                 'uploader': 'Rogers Sportsnet',
1031             },
1032         },
1033         # Dailymotion Cloud video
1034         {
1035             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1036             'md5': '49444254273501a64675a7e68c502681',
1037             'info_dict': {
1038                 'id': '5585de919473990de4bee11b',
1039                 'ext': 'mp4',
1040                 'title': 'Le débat',
1041                 'thumbnail': 're:^https?://.*\.jpe?g$',
1042             }
1043         },
1044         # OnionStudios embed
1045         {
1046             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1047             'info_dict': {
1048                 'id': '2855',
1049                 'ext': 'mp4',
1050                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1051                 'thumbnail': 're:^https?://.*\.jpe?g$',
1052                 'uploader': 'ClickHole',
1053                 'uploader_id': 'clickhole',
1054             }
1055         },
1056         # SnagFilms embed
1057         {
1058             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1059             'info_dict': {
1060                 'id': '74849a00-85a9-11e1-9660-123139220831',
1061                 'ext': 'mp4',
1062                 'title': '#whilewewatch',
1063             }
1064         },
1065         # AdobeTVVideo embed
1066         {
1067             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1068             'md5': '43662b577c018ad707a63766462b1e87',
1069             'info_dict': {
1070                 'id': '2456',
1071                 'ext': 'mp4',
1072                 'title': 'New experience with Acrobat DC',
1073                 'description': 'New experience with Acrobat DC',
1074                 'duration': 248.667,
1075             },
1076         },
1077         # ScreenwaveMedia embed
1078         {
1079             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1080             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1081             'info_dict': {
1082                 'id': 'cinemasnob-55d26273809dd',
1083                 'ext': 'mp4',
1084                 'title': 'cinemasnob',
1085             },
1086         },
1087         # BrightcoveInPageEmbed embed
1088         {
1089             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1090             'info_dict': {
1091                 'id': '4238694884001',
1092                 'ext': 'flv',
1093                 'title': 'Tabletop: Dread, Last Thoughts',
1094                 'description': 'Tabletop: Dread, Last Thoughts',
1095                 'duration': 51690,
1096             },
1097         },
1098         # JWPlayer with M3U8
1099         {
1100             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1101             'info_dict': {
1102                 'id': 'playlist',
1103                 'ext': 'mp4',
1104                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1105                 'uploader': 'ren.tv',
1106             },
1107             'params': {
1108                 # m3u8 downloads
1109                 'skip_download': True,
1110             }
1111         }
1112     ]
1113
1114     def report_following_redirect(self, new_url):
1115         """Report information extraction."""
1116         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1117
1118     def _extract_rss(self, url, video_id, doc):
1119         playlist_title = doc.find('./channel/title').text
1120         playlist_desc_el = doc.find('./channel/description')
1121         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1122
1123         entries = []
1124         for it in doc.findall('./channel/item'):
1125             next_url = xpath_text(it, 'link', fatal=False)
1126             if not next_url:
1127                 enclosure_nodes = it.findall('./enclosure')
1128                 for e in enclosure_nodes:
1129                     next_url = e.attrib.get('url')
1130                     if next_url:
1131                         break
1132
1133             if not next_url:
1134                 continue
1135
1136             entries.append({
1137                 '_type': 'url',
1138                 'url': next_url,
1139                 'title': it.find('title').text,
1140             })
1141
1142         return {
1143             '_type': 'playlist',
1144             'id': url,
1145             'title': playlist_title,
1146             'description': playlist_desc,
1147             'entries': entries,
1148         }
1149
1150     def _extract_camtasia(self, url, video_id, webpage):
1151         """ Returns None if no camtasia video can be found. """
1152
1153         camtasia_cfg = self._search_regex(
1154             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1155             webpage, 'camtasia configuration file', default=None)
1156         if camtasia_cfg is None:
1157             return None
1158
1159         title = self._html_search_meta('DC.title', webpage, fatal=True)
1160
1161         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1162         camtasia_cfg = self._download_xml(
1163             camtasia_url, video_id,
1164             note='Downloading camtasia configuration',
1165             errnote='Failed to download camtasia configuration')
1166         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1167
1168         entries = []
1169         for n in fileset_node.getchildren():
1170             url_n = n.find('./uri')
1171             if url_n is None:
1172                 continue
1173
1174             entries.append({
1175                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1176                 'title': '%s - %s' % (title, n.tag),
1177                 'url': compat_urlparse.urljoin(url, url_n.text),
1178                 'duration': float_or_none(n.find('./duration').text),
1179             })
1180
1181         return {
1182             '_type': 'playlist',
1183             'entries': entries,
1184             'title': title,
1185         }
1186
1187     def _real_extract(self, url):
1188         if url.startswith('//'):
1189             return {
1190                 '_type': 'url',
1191                 'url': self.http_scheme() + url,
1192             }
1193
1194         parsed_url = compat_urlparse.urlparse(url)
1195         if not parsed_url.scheme:
1196             default_search = self._downloader.params.get('default_search')
1197             if default_search is None:
1198                 default_search = 'fixup_error'
1199
1200             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1201                 if '/' in url:
1202                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1203                     return self.url_result('http://' + url)
1204                 elif default_search != 'fixup_error':
1205                     if default_search == 'auto_warning':
1206                         if re.match(r'^(?:url|URL)$', url):
1207                             raise ExtractorError(
1208                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1209                                 expected=True)
1210                         else:
1211                             self._downloader.report_warning(
1212                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1213                     return self.url_result('ytsearch:' + url)
1214
1215             if default_search in ('error', 'fixup_error'):
1216                 raise ExtractorError(
1217                     '%r is not a valid URL. '
1218                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1219                     % (url, url), expected=True)
1220             else:
1221                 if ':' not in default_search:
1222                     default_search += ':'
1223                 return self.url_result(default_search + url)
1224
1225         url, smuggled_data = unsmuggle_url(url)
1226         force_videoid = None
1227         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1228         if smuggled_data and 'force_videoid' in smuggled_data:
1229             force_videoid = smuggled_data['force_videoid']
1230             video_id = force_videoid
1231         else:
1232             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1233
1234         self.to_screen('%s: Requesting header' % video_id)
1235
1236         head_req = HEADRequest(url)
1237         head_response = self._request_webpage(
1238             head_req, video_id,
1239             note=False, errnote='Could not send HEAD request to %s' % url,
1240             fatal=False)
1241
1242         if head_response is not False:
1243             # Check for redirect
1244             new_url = head_response.geturl()
1245             if url != new_url:
1246                 self.report_following_redirect(new_url)
1247                 if force_videoid:
1248                     new_url = smuggle_url(
1249                         new_url, {'force_videoid': force_videoid})
1250                 return self.url_result(new_url)
1251
1252         full_response = None
1253         if head_response is False:
1254             request = sanitized_Request(url)
1255             request.add_header('Accept-Encoding', '*')
1256             full_response = self._request_webpage(request, video_id)
1257             head_response = full_response
1258
1259         info_dict = {
1260             'id': video_id,
1261             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1262             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1263         }
1264
1265         # Check for direct link to a video
1266         content_type = head_response.headers.get('Content-Type', '').lower()
1267         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1268         if m:
1269             format_id = m.group('format_id')
1270             if format_id.endswith('mpegurl'):
1271                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1272             elif format_id == 'f4m':
1273                 formats = self._extract_f4m_formats(url, video_id)
1274             else:
1275                 formats = [{
1276                     'format_id': m.group('format_id'),
1277                     'url': url,
1278                     'vcodec': 'none' if m.group('type') == 'audio' else None
1279                 }]
1280             info_dict.update({
1281                 'direct': True,
1282                 'formats': formats,
1283             })
1284             return info_dict
1285
1286         if not self._downloader.params.get('test', False) and not is_intentional:
1287             force = self._downloader.params.get('force_generic_extractor', False)
1288             self._downloader.report_warning(
1289                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1290
1291         if not full_response:
1292             request = sanitized_Request(url)
1293             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1294             # making it impossible to download only chunk of the file (yet we need only 512kB to
1295             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1296             # that will always result in downloading the whole file that is not desirable.
1297             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1298             # to accept raw bytes and being able to download only a chunk.
1299             # It may probably better to solve this by checking Content-Type for application/octet-stream
1300             # after HEAD request finishes, but not sure if we can rely on this.
1301             request.add_header('Accept-Encoding', '*')
1302             full_response = self._request_webpage(request, video_id)
1303
1304         # Maybe it's a direct link to a video?
1305         # Be careful not to download the whole thing!
1306         first_bytes = full_response.read(512)
1307         if not is_html(first_bytes):
1308             self._downloader.report_warning(
1309                 'URL could be a direct video link, returning it as such.')
1310             info_dict.update({
1311                 'direct': True,
1312                 'url': url,
1313             })
1314             return info_dict
1315
1316         webpage = self._webpage_read_content(
1317             full_response, url, video_id, prefix=first_bytes)
1318
1319         self.report_extraction(video_id)
1320
1321         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1322         try:
1323             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1324             if doc.tag == 'rss':
1325                 return self._extract_rss(url, video_id, doc)
1326             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1327                 return self._parse_smil(doc, url, video_id)
1328             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1329                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1330             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1331                 info_dict['formats'] = self._parse_mpd_formats(
1332                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1333                 return info_dict
1334             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1335                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1336                 return info_dict
1337         except compat_xml_parse_error:
1338             pass
1339
1340         # Is it a Camtasia project?
1341         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1342         if camtasia_res is not None:
1343             return camtasia_res
1344
1345         # Sometimes embedded video player is hidden behind percent encoding
1346         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1347         # Unescaping the whole page allows to handle those cases in a generic way
1348         webpage = compat_urllib_parse_unquote(webpage)
1349
1350         # it's tempting to parse this further, but you would
1351         # have to take into account all the variations like
1352         #   Video Title - Site Name
1353         #   Site Name | Video Title
1354         #   Video Title - Tagline | Site Name
1355         # and so on and so forth; it's just not practical
1356         video_title = self._html_search_regex(
1357             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1358             default='video')
1359
1360         # Try to detect age limit automatically
1361         age_limit = self._rta_search(webpage)
1362         # And then there are the jokers who advertise that they use RTA,
1363         # but actually don't.
1364         AGE_LIMIT_MARKERS = [
1365             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1366         ]
1367         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1368             age_limit = 18
1369
1370         # video uploader is domain name
1371         video_uploader = self._search_regex(
1372             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1373
1374         # Helper method
1375         def _playlist_from_matches(matches, getter=None, ie=None):
1376             urlrs = orderedSet(
1377                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1378                 for m in matches)
1379             return self.playlist_result(
1380                 urlrs, playlist_id=video_id, playlist_title=video_title)
1381
1382         # Look for Brightcove Legacy Studio embeds
1383         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1384         if bc_urls:
1385             self.to_screen('Brightcove video detected.')
1386             entries = [{
1387                 '_type': 'url',
1388                 'url': smuggle_url(bc_url, {'Referer': url}),
1389                 'ie_key': 'BrightcoveLegacy'
1390             } for bc_url in bc_urls]
1391
1392             return {
1393                 '_type': 'playlist',
1394                 'title': video_title,
1395                 'id': video_id,
1396                 'entries': entries,
1397             }
1398
1399         # Look for Brightcove New Studio embeds
1400         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1401         if bc_urls:
1402             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1403
1404         # Look for embedded rtl.nl player
1405         matches = re.findall(
1406             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1407             webpage)
1408         if matches:
1409             return _playlist_from_matches(matches, ie='RtlNl')
1410
1411         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1412         if vimeo_url is not None:
1413             return self.url_result(vimeo_url)
1414
1415         vid_me_embed_url = self._search_regex(
1416             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1417             webpage, 'vid.me embed', default=None)
1418         if vid_me_embed_url is not None:
1419             return self.url_result(vid_me_embed_url, 'Vidme')
1420
1421         # Look for embedded YouTube player
1422         matches = re.findall(r'''(?x)
1423             (?:
1424                 <iframe[^>]+?src=|
1425                 data-video-url=|
1426                 <embed[^>]+?src=|
1427                 embedSWF\(?:\s*|
1428                 new\s+SWFObject\(
1429             )
1430             (["\'])
1431                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1432                 (?:embed|v|p)/.+?)
1433             \1''', webpage)
1434         if matches:
1435             return _playlist_from_matches(
1436                 matches, lambda m: unescapeHTML(m[1]))
1437
1438         # Look for lazyYT YouTube embed
1439         matches = re.findall(
1440             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1441         if matches:
1442             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1443
1444         # Look for embedded Dailymotion player
1445         matches = re.findall(
1446             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1447         if matches:
1448             return _playlist_from_matches(
1449                 matches, lambda m: unescapeHTML(m[1]))
1450
1451         # Look for embedded Dailymotion playlist player (#3822)
1452         m = re.search(
1453             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1454         if m:
1455             playlists = re.findall(
1456                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1457             if playlists:
1458                 return _playlist_from_matches(
1459                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1460
1461         # Look for embedded Wistia player
1462         match = re.search(
1463             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1464         if match:
1465             embed_url = self._proto_relative_url(
1466                 unescapeHTML(match.group('url')))
1467             return {
1468                 '_type': 'url_transparent',
1469                 'url': embed_url,
1470                 'ie_key': 'Wistia',
1471                 'uploader': video_uploader,
1472                 'title': video_title,
1473                 'id': video_id,
1474             }
1475
1476         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1477         if match:
1478             return {
1479                 '_type': 'url_transparent',
1480                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1481                 'ie_key': 'Wistia',
1482                 'uploader': video_uploader,
1483                 'title': video_title,
1484                 'id': match.group('id')
1485             }
1486
1487         # Look for SVT player
1488         svt_url = SVTIE._extract_url(webpage)
1489         if svt_url:
1490             return self.url_result(svt_url, 'SVT')
1491
1492         # Look for embedded condenast player
1493         matches = re.findall(
1494             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1495             webpage)
1496         if matches:
1497             return {
1498                 '_type': 'playlist',
1499                 'entries': [{
1500                     '_type': 'url',
1501                     'ie_key': 'CondeNast',
1502                     'url': ma,
1503                 } for ma in matches],
1504                 'title': video_title,
1505                 'id': video_id,
1506             }
1507
1508         # Look for Bandcamp pages with custom domain
1509         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1510         if mobj is not None:
1511             burl = unescapeHTML(mobj.group(1))
1512             # Don't set the extractor because it can be a track url or an album
1513             return self.url_result(burl)
1514
1515         # Look for embedded Vevo player
1516         mobj = re.search(
1517             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1518         if mobj is not None:
1519             return self.url_result(mobj.group('url'))
1520
1521         # Look for embedded Viddler player
1522         mobj = re.search(
1523             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1524             webpage)
1525         if mobj is not None:
1526             return self.url_result(mobj.group('url'))
1527
1528         # Look for NYTimes player
1529         mobj = re.search(
1530             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1531             webpage)
1532         if mobj is not None:
1533             return self.url_result(mobj.group('url'))
1534
1535         # Look for Libsyn player
1536         mobj = re.search(
1537             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1538         if mobj is not None:
1539             return self.url_result(mobj.group('url'))
1540
1541         # Look for Ooyala videos
1542         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1543                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1544                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1545                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1546         if mobj is not None:
1547             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1548
1549         # Look for multiple Ooyala embeds on SBN network websites
1550         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1551         if mobj is not None:
1552             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1553             if embeds:
1554                 return _playlist_from_matches(
1555                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1556
1557         # Look for Aparat videos
1558         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1559         if mobj is not None:
1560             return self.url_result(mobj.group(1), 'Aparat')
1561
1562         # Look for MPORA videos
1563         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1564         if mobj is not None:
1565             return self.url_result(mobj.group(1), 'Mpora')
1566
1567         # Look for embedded NovaMov-based player
1568         mobj = re.search(
1569             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1570                     (?P<url>http://(?:(?:embed|www)\.)?
1571                         (?:novamov\.com|
1572                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1573                            videoweed\.(?:es|com)|
1574                            movshare\.(?:net|sx|ag)|
1575                            divxstage\.(?:eu|net|ch|co|at|ag))
1576                         /embed\.php.+?)\1''', webpage)
1577         if mobj is not None:
1578             return self.url_result(mobj.group('url'))
1579
1580         # Look for embedded Facebook player
1581         mobj = re.search(
1582             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1583         if mobj is not None:
1584             return self.url_result(mobj.group('url'), 'Facebook')
1585
1586         # Look for embedded VK player
1587         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1588         if mobj is not None:
1589             return self.url_result(mobj.group('url'), 'VK')
1590
1591         # Look for embedded Odnoklassniki player
1592         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1593         if mobj is not None:
1594             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1595
1596         # Look for embedded ivi player
1597         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1598         if mobj is not None:
1599             return self.url_result(mobj.group('url'), 'Ivi')
1600
1601         # Look for embedded Huffington Post player
1602         mobj = re.search(
1603             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1604         if mobj is not None:
1605             return self.url_result(mobj.group('url'), 'HuffPost')
1606
1607         # Look for embed.ly
1608         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1609         if mobj is not None:
1610             return self.url_result(mobj.group('url'))
1611         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1612         if mobj is not None:
1613             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1614
1615         # Look for funnyordie embed
1616         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1617         if matches:
1618             return _playlist_from_matches(
1619                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1620
1621         # Look for BBC iPlayer embed
1622         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1623         if matches:
1624             return _playlist_from_matches(matches, ie='BBCCoUk')
1625
1626         # Look for embedded RUTV player
1627         rutv_url = RUTVIE._extract_url(webpage)
1628         if rutv_url:
1629             return self.url_result(rutv_url, 'RUTV')
1630
1631         # Look for embedded TVC player
1632         tvc_url = TVCIE._extract_url(webpage)
1633         if tvc_url:
1634             return self.url_result(tvc_url, 'TVC')
1635
1636         # Look for embedded SportBox player
1637         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1638         if sportbox_urls:
1639             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1640
1641         # Look for embedded PornHub player
1642         pornhub_url = PornHubIE._extract_url(webpage)
1643         if pornhub_url:
1644             return self.url_result(pornhub_url, 'PornHub')
1645
1646         # Look for embedded XHamster player
1647         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1648         if xhamster_urls:
1649             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1650
1651         # Look for embedded TNAFlixNetwork player
1652         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1653         if tnaflix_urls:
1654             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1655
1656         # Look for embedded Tvigle player
1657         mobj = re.search(
1658             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1659         if mobj is not None:
1660             return self.url_result(mobj.group('url'), 'Tvigle')
1661
1662         # Look for embedded TED player
1663         mobj = re.search(
1664             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1665         if mobj is not None:
1666             return self.url_result(mobj.group('url'), 'TED')
1667
1668         # Look for embedded Ustream videos
1669         mobj = re.search(
1670             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1671         if mobj is not None:
1672             return self.url_result(mobj.group('url'), 'Ustream')
1673
1674         # Look for embedded arte.tv player
1675         mobj = re.search(
1676             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1677             webpage)
1678         if mobj is not None:
1679             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1680
1681         # Look for embedded francetv player
1682         mobj = re.search(
1683             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1684             webpage)
1685         if mobj is not None:
1686             return self.url_result(mobj.group('url'))
1687
1688         # Look for embedded smotri.com player
1689         smotri_url = SmotriIE._extract_url(webpage)
1690         if smotri_url:
1691             return self.url_result(smotri_url, 'Smotri')
1692
1693         # Look for embedded Myvi.ru player
1694         myvi_url = MyviIE._extract_url(webpage)
1695         if myvi_url:
1696             return self.url_result(myvi_url)
1697
1698         # Look for embedded soundcloud player
1699         mobj = re.search(
1700             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1701             webpage)
1702         if mobj is not None:
1703             url = unescapeHTML(mobj.group('url'))
1704             return self.url_result(url)
1705
1706         # Look for embedded vulture.com player
1707         mobj = re.search(
1708             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1709             webpage)
1710         if mobj is not None:
1711             url = unescapeHTML(mobj.group('url'))
1712             return self.url_result(url, ie='Vulture')
1713
1714         # Look for embedded mtvservices player
1715         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1716         if mtvservices_url:
1717             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1718
1719         # Look for embedded yahoo player
1720         mobj = re.search(
1721             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1722             webpage)
1723         if mobj is not None:
1724             return self.url_result(mobj.group('url'), 'Yahoo')
1725
1726         # Look for embedded sbs.com.au player
1727         mobj = re.search(
1728             r'''(?x)
1729             (?:
1730                 <meta\s+property="og:video"\s+content=|
1731                 <iframe[^>]+?src=
1732             )
1733             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1734             webpage)
1735         if mobj is not None:
1736             return self.url_result(mobj.group('url'), 'SBS')
1737
1738         # Look for embedded Cinchcast player
1739         mobj = re.search(
1740             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1741             webpage)
1742         if mobj is not None:
1743             return self.url_result(mobj.group('url'), 'Cinchcast')
1744
1745         mobj = re.search(
1746             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1747             webpage)
1748         if not mobj:
1749             mobj = re.search(
1750                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1751                 webpage)
1752         if mobj is not None:
1753             return self.url_result(mobj.group('url'), 'MLB')
1754
1755         mobj = re.search(
1756             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1757             webpage)
1758         if mobj is not None:
1759             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1760
1761         mobj = re.search(
1762             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1763             webpage)
1764         if mobj is not None:
1765             return self.url_result(mobj.group('url'), 'Livestream')
1766
1767         # Look for Zapiks embed
1768         mobj = re.search(
1769             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1770         if mobj is not None:
1771             return self.url_result(mobj.group('url'), 'Zapiks')
1772
1773         # Look for Kaltura embeds
1774         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1775                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1776         if mobj is not None:
1777             return self.url_result(smuggle_url(
1778                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1779                 {'source_url': url}), 'Kaltura')
1780
1781         # Look for Eagle.Platform embeds
1782         mobj = re.search(
1783             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1784         if mobj is not None:
1785             return self.url_result(mobj.group('url'), 'EaglePlatform')
1786
1787         # Look for ClipYou (uses Eagle.Platform) embeds
1788         mobj = re.search(
1789             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1790         if mobj is not None:
1791             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1792
1793         # Look for Pladform embeds
1794         pladform_url = PladformIE._extract_url(webpage)
1795         if pladform_url:
1796             return self.url_result(pladform_url)
1797
1798         # Look for Videomore embeds
1799         videomore_url = VideomoreIE._extract_url(webpage)
1800         if videomore_url:
1801             return self.url_result(videomore_url)
1802
1803         # Look for Playwire embeds
1804         mobj = re.search(
1805             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1806         if mobj is not None:
1807             return self.url_result(mobj.group('url'))
1808
1809         # Look for 5min embeds
1810         mobj = re.search(
1811             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1812         if mobj is not None:
1813             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1814
1815         # Look for Crooks and Liars embeds
1816         mobj = re.search(
1817             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1818         if mobj is not None:
1819             return self.url_result(mobj.group('url'))
1820
1821         # Look for NBC Sports VPlayer embeds
1822         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1823         if nbc_sports_url:
1824             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1825
1826         # Look for Google Drive embeds
1827         google_drive_url = GoogleDriveIE._extract_url(webpage)
1828         if google_drive_url:
1829             return self.url_result(google_drive_url, 'GoogleDrive')
1830
1831         # Look for UDN embeds
1832         mobj = re.search(
1833             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1834         if mobj is not None:
1835             return self.url_result(
1836                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1837
1838         # Look for Senate ISVP iframe
1839         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1840         if senate_isvp_url:
1841             return self.url_result(senate_isvp_url, 'SenateISVP')
1842
1843         # Look for Dailymotion Cloud videos
1844         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1845         if dmcloud_url:
1846             return self.url_result(dmcloud_url, 'DailymotionCloud')
1847
1848         # Look for OnionStudios embeds
1849         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1850         if onionstudios_url:
1851             return self.url_result(onionstudios_url)
1852
1853         # Look for SnagFilms embeds
1854         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1855         if snagfilms_url:
1856             return self.url_result(snagfilms_url)
1857
1858         # Look for JWPlatform embeds
1859         jwplatform_url = JWPlatformIE._extract_url(webpage)
1860         if jwplatform_url:
1861             return self.url_result(jwplatform_url, 'JWPlatform')
1862
1863         # Look for ScreenwaveMedia embeds
1864         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1865         if mobj is not None:
1866             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1867
1868         # Look for Digiteka embeds
1869         digiteka_url = DigitekaIE._extract_url(webpage)
1870         if digiteka_url:
1871             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1872
1873         # Look for Limelight embeds
1874         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1875         if mobj:
1876             lm = {
1877                 'Media': 'media',
1878                 'Channel': 'channel',
1879                 'ChannelList': 'channel_list',
1880             }
1881             return self.url_result('limelight:%s:%s' % (
1882                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1883
1884         # Look for AdobeTVVideo embeds
1885         mobj = re.search(
1886             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1887             webpage)
1888         if mobj is not None:
1889             return self.url_result(
1890                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1891                 'AdobeTVVideo')
1892
1893         def check_video(vurl):
1894             if YoutubeIE.suitable(vurl):
1895                 return True
1896             vpath = compat_urlparse.urlparse(vurl).path
1897             vext = determine_ext(vpath)
1898             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1899
1900         def filter_video(urls):
1901             return list(filter(check_video, urls))
1902
1903         # Start with something easy: JW Player in SWFObject
1904         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1905         if not found:
1906             # Look for gorilla-vid style embedding
1907             found = filter_video(re.findall(r'''(?sx)
1908                 (?:
1909                     jw_plugins|
1910                     JWPlayerOptions|
1911                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1912                 )
1913                 .*?
1914                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1915         if not found:
1916             # Broaden the search a little bit
1917             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1918         if not found:
1919             # Broaden the findall a little bit: JWPlayer JS loader
1920             found = filter_video(re.findall(
1921                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1922         if not found:
1923             # Flow player
1924             found = filter_video(re.findall(r'''(?xs)
1925                 flowplayer\("[^"]+",\s*
1926                     \{[^}]+?\}\s*,
1927                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1928                         ["']?url["']?\s*:\s*["']([^"']+)["']
1929             ''', webpage))
1930         if not found:
1931             # Cinerama player
1932             found = re.findall(
1933                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1934         if not found:
1935             # Try to find twitter cards info
1936             found = filter_video(re.findall(
1937                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1938         if not found:
1939             # We look for Open Graph info:
1940             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1941             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1942             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1943             if m_video_type is not None:
1944                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1945         if not found:
1946             # HTML5 video
1947             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1948         if not found:
1949             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1950             found = re.search(
1951                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1952                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1953                 webpage)
1954             if not found:
1955                 # Look also in Refresh HTTP header
1956                 refresh_header = head_response.headers.get('Refresh')
1957                 if refresh_header:
1958                     # In python 2 response HTTP headers are bytestrings
1959                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1960                         refresh_header = refresh_header.decode('iso-8859-1')
1961                     found = re.search(REDIRECT_REGEX, refresh_header)
1962             if found:
1963                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1964                 self.report_following_redirect(new_url)
1965                 return {
1966                     '_type': 'url',
1967                     'url': new_url,
1968                 }
1969         if not found:
1970             raise UnsupportedError(url)
1971
1972         entries = []
1973         for video_url in found:
1974             video_url = video_url.replace('\\/', '/')
1975             video_url = compat_urlparse.urljoin(url, video_url)
1976             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1977
1978             # Sometimes, jwplayer extraction will result in a YouTube URL
1979             if YoutubeIE.suitable(video_url):
1980                 entries.append(self.url_result(video_url, 'Youtube'))
1981                 continue
1982
1983             # here's a fun little line of code for you:
1984             video_id = os.path.splitext(video_id)[0]
1985
1986             entry_info_dict = {
1987                 'id': video_id,
1988                 'uploader': video_uploader,
1989                 'title': video_title,
1990                 'age_limit': age_limit,
1991             }
1992
1993             ext = determine_ext(video_url)
1994             if ext == 'smil':
1995                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
1996             elif ext == 'xspf':
1997                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1998             elif ext == 'm3u8':
1999                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2000             elif ext == 'mpd':
2001                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2002             elif ext == 'f4m':
2003                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2004             else:
2005                 entry_info_dict['url'] = video_url
2006
2007             entries.append(entry_info_dict)
2008
2009         if len(entries) == 1:
2010             return entries[0]
2011         else:
2012             for num, e in enumerate(entries, start=1):
2013                 # 'url' results don't have a title
2014                 if e.get('title') is not None:
2015                     e['title'] = '%s (%d)' % (e['title'], num)
2016             return {
2017                 '_type': 'playlist',
2018                 'entries': entries,
2019             }