youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urllib_parse_unquote,
  13     compat_urllib_request,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     parse_xml,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import BrightcoveIE
  34 from .nbc import NBCSportsVPlayerIE
  35 from .ooyala import OoyalaIE
  36 from .rutv import RUTVIE
  37 from .tvc import TVCIE
  38 from .sportbox import SportBoxEmbedIE
  39 from .smotri import SmotriIE
  40 from .condenast import CondeNastIE
  41 from .udn import UDNEmbedIE
  42 from .senateisvp import SenateISVPIE
  43 from .bliptv import BlipTVIE
  44 from .svt import SVTIE
  45
  46
  47 class GenericIE(InfoExtractor):
  48     IE_DESC = 'Generic downloader that works on some sites'
  49     _VALID_URL = r'.*'
  50     IE_NAME = 'generic'
  51     _TESTS = [
  52         # Direct link to a video
  53         {
  54             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  55             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  56             'info_dict': {
  57                 'id': 'trailer',
  58                 'ext': 'mp4',
  59                 'title': 'trailer',
  60                 'upload_date': '20100513',
  61             }
  62         },
  63         # Direct link to media delivered compressed (until Accept-Encoding is *)
  64         {
  65             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  66             'md5': '128c42e68b13950268b648275386fc74',
  67             'info_dict': {
  68                 'id': 'FictionJunction-Parallel_Hearts',
  69                 'ext': 'flac',
  70                 'title': 'FictionJunction-Parallel_Hearts',
  71                 'upload_date': '20140522',
  72             },
  73             'expected_warnings': [
  74                 'URL could be a direct video link, returning it as such.'
  75             ]
  76         },
  77         # Direct download with broken HEAD
  78         {
  79             'url': 'http://ai-radio.org:8000/radio.opus',
  80             'info_dict': {
  81                 'id': 'radio',
  82                 'ext': 'opus',
  83                 'title': 'radio',
  84             },
  85             'params': {
  86                 'skip_download': True,  # infinite live stream
  87             },
  88             'expected_warnings': [
  89                 r'501.*Not Implemented'
  90             ],
  91         },
  92         # Direct link with incorrect MIME type
  93         {
  94             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  95             'md5': '4ccbebe5f36706d85221f204d7eb5913',
  96             'info_dict': {
  97                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  98                 'id': '5_Lennart_Poettering_-_Systemd',
  99                 'ext': 'webm',
 100                 'title': '5_Lennart_Poettering_-_Systemd',
 101                 'upload_date': '20141120',
 102             },
 103             'expected_warnings': [
 104                 'URL could be a direct video link, returning it as such.'
 105             ]
 106         },
 107         # RSS feed
 108         {
 109             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 110             'info_dict': {
 111                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 112                 'title': 'Zero Punctuation',
 113                 'description': 're:.*groundbreaking video review series.*'
 114             },
 115             'playlist_mincount': 11,
 116         },
 117         # RSS feed with enclosure
 118         {
 119             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 120             'info_dict': {
 121                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 122                 'ext': 'm4v',
 123                 'upload_date': '20150228',
 124                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 125             }
 126         },
 127         # google redirect
 128         {
 129             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 130             'info_dict': {
 131                 'id': 'cmQHVoWB5FY',
 132                 'ext': 'mp4',
 133                 'upload_date': '20130224',
 134                 'uploader_id': 'TheVerge',
 135                 'description': 're:^Chris Ziegler takes a look at the\.*',
 136                 'uploader': 'The Verge',
 137                 'title': 'First Firefox OS phones side-by-side',
 138             },
 139             'params': {
 140                 'skip_download': False,
 141             }
 142         },
 143         {
 144             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 145             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 146             'info_dict': {
 147                 'id': '13601338388002',
 148                 'ext': 'mp4',
 149                 'uploader': 'www.hodiho.fr',
 150                 'title': 'R\u00e9gis plante sa Jeep',
 151             }
 152         },
 153         # bandcamp page with custom domain
 154         {
 155             'add_ie': ['Bandcamp'],
 156             'url': 'http://bronyrock.com/track/the-pony-mash',
 157             'info_dict': {
 158                 'id': '3235767654',
 159                 'ext': 'mp3',
 160                 'title': 'The Pony Mash',
 161                 'uploader': 'M_Pallante',
 162             },
 163             'skip': 'There is a limit of 200 free downloads / month for the test song',
 164         },
 165         # embedded brightcove video
 166         # it also tests brightcove videos that need to set the 'Referer' in the
 167         # http requests
 168         {
 169             'add_ie': ['Brightcove'],
 170             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 171             'info_dict': {
 172                 'id': '2765128793001',
 173                 'ext': 'mp4',
 174                 'title': 'Le cours de bourse : l’analyse technique',
 175                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 176                 'uploader': 'BFM BUSINESS',
 177             },
 178             'params': {
 179                 'skip_download': True,
 180             },
 181         },
 182         {
 183             # https://github.com/rg3/youtube-dl/issues/2253
 184             'url': 'http://bcove.me/i6nfkrc3',
 185             'md5': '0ba9446db037002366bab3b3eb30c88c',
 186             'info_dict': {
 187                 'id': '3101154703001',
 188                 'ext': 'mp4',
 189                 'title': 'Still no power',
 190                 'uploader': 'thestar.com',
 191                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 192             },
 193             'add_ie': ['Brightcove'],
 194         },
 195         {
 196             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 197             'md5': 'fb973ecf6e4a78a67453647444222983',
 198             'info_dict': {
 199                 'id': '3414141473001',
 200                 'ext': 'mp4',
 201                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 202                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 203                 'uploader': 'Championat',
 204             },
 205         },
 206         {
 207             # https://github.com/rg3/youtube-dl/issues/3541
 208             'add_ie': ['Brightcove'],
 209             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 210             'info_dict': {
 211                 'id': '3866516442001',
 212                 'ext': 'mp4',
 213                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 214                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 215                 'uploader': 'SBS Broadcasting',
 216             },
 217             'skip': 'Restricted to Netherlands',
 218             'params': {
 219                 'skip_download': True,  # m3u8 download
 220             },
 221         },
 222         # ooyala video
 223         {
 224             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 225             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 226             'info_dict': {
 227                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 228                 'ext': 'mp4',
 229                 'title': '2cc213299525360.mov',  # that's what we get
 230             },
 231             'add_ie': ['Ooyala'],
 232         },
 233         # multiple ooyala embeds on SBN network websites
 234         {
 235             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 236             'info_dict': {
 237                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 238                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 239             },
 240             'playlist_mincount': 3,
 241             'params': {
 242                 'skip_download': True,
 243             },
 244             'add_ie': ['Ooyala'],
 245         },
 246         # embed.ly video
 247         {
 248             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 249             'info_dict': {
 250                 'id': '9ODmcdjQcHQ',
 251                 'ext': 'mp4',
 252                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 253                 'upload_date': '20140225',
 254                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 255                 'uploader': 'Tested',
 256                 'uploader_id': 'testedcom',
 257             },
 258             # No need to test YoutubeIE here
 259             'params': {
 260                 'skip_download': True,
 261             },
 262         },
 263         # funnyordie embed
 264         {
 265             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 266             'info_dict': {
 267                 'id': '18e820ec3f',
 268                 'ext': 'mp4',
 269                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 270                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 271             },
 272         },
 273         # BBC iPlayer embeds
 274         {
 275             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 276             'info_dict': {
 277                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 278             },
 279             'playlist_mincount': 18,
 280         },
 281         # RUTV embed
 282         {
 283             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 284             'info_dict': {
 285                 'id': '776940',
 286                 'ext': 'mp4',
 287                 'title': 'Охотское море стало целиком российским',
 288                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 289             },
 290             'params': {
 291                 # m3u8 download
 292                 'skip_download': True,
 293             },
 294         },
 295         # TVC embed
 296         {
 297             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 298             'info_dict': {
 299                 'id': '55304',
 300                 'ext': 'mp4',
 301                 'title': 'Дошкольное воспитание',
 302             },
 303         },
 304         # SportBox embed
 305         {
 306             'url': 'http://www.vestifinance.ru/articles/25753',
 307             'info_dict': {
 308                 'id': '25753',
 309                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 310             },
 311             'playlist': [{
 312                 'info_dict': {
 313                     'id': '370908',
 314                     'title': 'Госзаказ. День 3',
 315                     'ext': 'mp4',
 316                 }
 317             }, {
 318                 'info_dict': {
 319                     'id': '370905',
 320                     'title': 'Госзаказ. День 2',
 321                     'ext': 'mp4',
 322                 }
 323             }, {
 324                 'info_dict': {
 325                     'id': '370902',
 326                     'title': 'Госзаказ. День 1',
 327                     'ext': 'mp4',
 328                 }
 329             }],
 330             'params': {
 331                 # m3u8 download
 332                 'skip_download': True,
 333             },
 334         },
 335         # Embedded TED video
 336         {
 337             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 338             'md5': '65fdff94098e4a607385a60c5177c638',
 339             'info_dict': {
 340                 'id': '1969',
 341                 'ext': 'mp4',
 342                 'title': 'Hidden miracles of the natural world',
 343                 'uploader': 'Louie Schwartzberg',
 344                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 345             }
 346         },
 347         # Embeded Ustream video
 348         {
 349             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 350             'md5': '27b99cdb639c9b12a79bca876a073417',
 351             'info_dict': {
 352                 'id': '45734260',
 353                 'ext': 'flv',
 354                 'uploader': 'AU SPA:  The NSA and Privacy',
 355                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 356             }
 357         },
 358         # nowvideo embed hidden behind percent encoding
 359         {
 360             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 361             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 362             'info_dict': {
 363                 'id': '06e53103ca9aa',
 364                 'ext': 'flv',
 365                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 366                 'description': 'No description',
 367             },
 368         },
 369         # arte embed
 370         {
 371             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 372             'md5': '7653032cbb25bf6c80d80f217055fa43',
 373             'info_dict': {
 374                 'id': '048195-004_PLUS7-F',
 375                 'ext': 'flv',
 376                 'title': 'X:enius',
 377                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 378                 'upload_date': '20140320',
 379             },
 380             'params': {
 381                 'skip_download': 'Requires rtmpdump'
 382             }
 383         },
 384         # Condé Nast embed
 385         {
 386             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 387             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 388             'info_dict': {
 389                 'id': '53501be369702d3275860000',
 390                 'ext': 'mp4',
 391                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 392             }
 393         },
 394         # Dailymotion embed
 395         {
 396             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 397             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 398             'info_dict': {
 399                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 400                 'ext': 'mp4',
 401                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 402                 'uploader': 'Spi0n',
 403             },
 404             'add_ie': ['Dailymotion'],
 405         },
 406         # YouTube embed
 407         {
 408             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 409             'info_dict': {
 410                 'id': 'FXRb4ykk4S0',
 411                 'ext': 'mp4',
 412                 'title': 'The NBL Auction 2014',
 413                 'uploader': 'BADMINTON England',
 414                 'uploader_id': 'BADMINTONEvents',
 415                 'upload_date': '20140603',
 416                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 417             },
 418             'add_ie': ['Youtube'],
 419             'params': {
 420                 'skip_download': True,
 421             }
 422         },
 423         # MTVSercices embed
 424         {
 425             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 426             'md5': '35727f82f58c76d996fc188f9755b0d5',
 427             'info_dict': {
 428                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 429                 'ext': 'mp4',
 430                 'title': 'Review',
 431                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 432             },
 433         },
 434         # YouTube embed via <data-embed-url="">
 435         {
 436             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 437             'info_dict': {
 438                 'id': '4vAffPZIT44',
 439                 'ext': 'mp4',
 440                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 441                 'uploader': 'Gameloft',
 442                 'uploader_id': 'gameloft',
 443                 'upload_date': '20140828',
 444                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 445             },
 446             'params': {
 447                 'skip_download': True,
 448             }
 449         },
 450         # Camtasia studio
 451         {
 452             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 453             'playlist': [{
 454                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 455                 'info_dict': {
 456                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 457                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 458                     'ext': 'flv',
 459                     'duration': 2235.90,
 460                 }
 461             }, {
 462                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 463                 'info_dict': {
 464                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 465                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 466                     'ext': 'flv',
 467                     'duration': 2235.93,
 468                 }
 469             }],
 470             'info_dict': {
 471                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 472             }
 473         },
 474         # Flowplayer
 475         {
 476             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 477             'md5': '9d65602bf31c6e20014319c7d07fba27',
 478             'info_dict': {
 479                 'id': '5123ea6d5e5a7',
 480                 'ext': 'mp4',
 481                 'age_limit': 18,
 482                 'uploader': 'www.handjobhub.com',
 483                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 484             }
 485         },
 486         # Multiple brightcove videos
 487         # https://github.com/rg3/youtube-dl/issues/2283
 488         {
 489             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 490             'info_dict': {
 491                 'id': 'always-never',
 492                 'title': 'Always / Never - The New Yorker',
 493             },
 494             'playlist_count': 3,
 495             'params': {
 496                 'extract_flat': False,
 497                 'skip_download': True,
 498             }
 499         },
 500         # MLB embed
 501         {
 502             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 503             'md5': '96f09a37e44da40dd083e12d9a683327',
 504             'info_dict': {
 505                 'id': '33322633',
 506                 'ext': 'mp4',
 507                 'title': 'Ump changes call to ball',
 508                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 509                 'duration': 48,
 510                 'timestamp': 1401537900,
 511                 'upload_date': '20140531',
 512                 'thumbnail': 're:^https?://.*\.jpg$',
 513             },
 514         },
 515         # Wistia embed
 516         {
 517             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 518             'md5': '8788b683c777a5cf25621eaf286d0c23',
 519             'info_dict': {
 520                 'id': '1cfaf6b7ea',
 521                 'ext': 'mov',
 522                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 523                 'duration': 643.0,
 524                 'filesize': 182808282,
 525                 'uploader': 'education-portal.com',
 526             },
 527         },
 528         {
 529             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 530             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 531             'info_dict': {
 532                 'id': 'uxjb0lwrcz',
 533                 'ext': 'mp4',
 534                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 535                 'duration': 1715.0,
 536                 'uploader': 'thoughtworks.wistia.com',
 537             },
 538         },
 539         # Soundcloud embed
 540         {
 541             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 542             'info_dict': {
 543                 'id': '174391317',
 544                 'ext': 'mp3',
 545                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 546                 'uploader': 'Sophos Security',
 547                 'title': 'Chet Chat 171 - Oct 29, 2014',
 548                 'upload_date': '20141029',
 549             }
 550         },
 551         # Livestream embed
 552         {
 553             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 554             'info_dict': {
 555                 'id': '67864563',
 556                 'ext': 'flv',
 557                 'upload_date': '20141112',
 558                 'title': 'Rosetta #CometLanding webcast HL 10',
 559             }
 560         },
 561         # LazyYT
 562         {
 563             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 564             'info_dict': {
 565                 'id': '1986',
 566                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 567             },
 568             'playlist_mincount': 2,
 569         },
 570         # Cinchcast embed
 571         {
 572             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 573             'info_dict': {
 574                 'id': '7141703',
 575                 'ext': 'mp3',
 576                 'upload_date': '20141126',
 577                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 578             }
 579         },
 580         # Cinerama player
 581         {
 582             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 583             'info_dict': {
 584                 'id': '730m_DandD_1901_512k',
 585                 'ext': 'mp4',
 586                 'uploader': 'www.abc.net.au',
 587                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 588             }
 589         },
 590         # embedded viddler video
 591         {
 592             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 593             'info_dict': {
 594                 'id': '4d03aad9',
 595                 'ext': 'mp4',
 596                 'uploader': 'deadspin',
 597                 'title': 'WALL-TO-GORTAT',
 598                 'timestamp': 1422285291,
 599                 'upload_date': '20150126',
 600             },
 601             'add_ie': ['Viddler'],
 602         },
 603         # Libsyn embed
 604         {
 605             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 606             'info_dict': {
 607                 'id': '3377616',
 608                 'ext': 'mp3',
 609                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 610                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 611                 'upload_date': '20150220',
 612             },
 613         },
 614         # jwplayer YouTube
 615         {
 616             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 617             'info_dict': {
 618                 'id': 'Mrj4DVp2zeA',
 619                 'ext': 'mp4',
 620                 'upload_date': '20150212',
 621                 'uploader': 'The National Archives UK',
 622                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 623                 'uploader_id': 'NationalArchives08',
 624                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 625             },
 626         },
 627         # rtl.nl embed
 628         {
 629             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 630             'playlist_mincount': 5,
 631             'info_dict': {
 632                 'id': 'aanslagen-kopenhagen',
 633                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 634             }
 635         },
 636         # Zapiks embed
 637         {
 638             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 639             'info_dict': {
 640                 'id': '118046',
 641                 'ext': 'mp4',
 642                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 643             }
 644         },
 645         # Kaltura embed
 646         {
 647             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 648             'info_dict': {
 649                 'id': '1_eergr3h1',
 650                 'ext': 'mp4',
 651                 'upload_date': '20150226',
 652                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 653                 'timestamp': int,
 654                 'title': 'John Carlson Postgame 2/25/15',
 655             },
 656         },
 657         # Eagle.Platform embed (generic URL)
 658         {
 659             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 660             'info_dict': {
 661                 'id': '227304',
 662                 'ext': 'mp4',
 663                 'title': 'Навальный вышел на свободу',
 664                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 665                 'thumbnail': 're:^https?://.*\.jpg$',
 666                 'duration': 87,
 667                 'view_count': int,
 668                 'age_limit': 0,
 669             },
 670         },
 671         # ClipYou (Eagle.Platform) embed (custom URL)
 672         {
 673             'url': 'http://muz-tv.ru/play/7129/',
 674             'info_dict': {
 675                 'id': '12820',
 676                 'ext': 'mp4',
 677                 'title': "'O Sole Mio",
 678                 'thumbnail': 're:^https?://.*\.jpg$',
 679                 'duration': 216,
 680                 'view_count': int,
 681             },
 682         },
 683         # Pladform embed
 684         {
 685             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 686             'info_dict': {
 687                 'id': '100183293',
 688                 'ext': 'mp4',
 689                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 690                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 691                 'thumbnail': 're:^https?://.*\.jpg$',
 692                 'duration': 694,
 693                 'age_limit': 0,
 694             },
 695         },
 696         # Playwire embed
 697         {
 698             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 699             'info_dict': {
 700                 'id': '3519514',
 701                 'ext': 'mp4',
 702                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 703                 'thumbnail': 're:^https?://.*\.png$',
 704                 'duration': 45.115,
 705             },
 706         },
 707         # 5min embed
 708         {
 709             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 710             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 711             'info_dict': {
 712                 'id': '518726732',
 713                 'ext': 'mp4',
 714                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 715             },
 716         },
 717         # SVT embed
 718         {
 719             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 720             'info_dict': {
 721                 'id': '2900353',
 722                 'ext': 'flv',
 723                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 724                 'duration': 27,
 725                 'age_limit': 0,
 726             },
 727         },
 728         # Crooks and Liars embed
 729         {
 730             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 731             'info_dict': {
 732                 'id': '8RUoRhRi',
 733                 'ext': 'mp4',
 734                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 735                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 736                 'timestamp': 1428207000,
 737                 'upload_date': '20150405',
 738                 'uploader': 'Heather',
 739             },
 740         },
 741         # Crooks and Liars external embed
 742         {
 743             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 744             'info_dict': {
 745                 'id': 'MTE3MjUtMzQ2MzA',
 746                 'ext': 'mp4',
 747                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 748                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 749                 'timestamp': 1265032391,
 750                 'upload_date': '20100201',
 751                 'uploader': 'Heather',
 752             },
 753         },
 754         # NBC Sports vplayer embed
 755         {
 756             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 757             'info_dict': {
 758                 'id': 'ln7x1qSThw4k',
 759                 'ext': 'flv',
 760                 'title': "PFT Live: New leader in the 'new-look' defense",
 761                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 762             },
 763         },
 764         # UDN embed
 765         {
 766             'url': 'http://www.udn.com/news/story/7314/822787',
 767             'md5': 'fd2060e988c326991037b9aff9df21a6',
 768             'info_dict': {
 769                 'id': '300346',
 770                 'ext': 'mp4',
 771                 'title': '中一中男師變性 全校師生力挺',
 772                 'thumbnail': 're:^https?://.*\.jpg$',
 773             }
 774         },
 775         # Ooyala embed
 776         {
 777             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 778             'info_dict': {
 779                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 780                 'ext': 'mp4',
 781                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 782                 'title': 'This is what separates the Excel masters from the wannabes',
 783             },
 784             'params': {
 785                 # m3u8 downloads
 786                 'skip_download': True,
 787             }
 788         },
 789         # Contains a SMIL manifest
 790         {
 791             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 792             'info_dict': {
 793                 'id': 'file',
 794                 'ext': 'flv',
 795                 'title': '+ Football: Lottery Champions League Europe',
 796                 'uploader': 'www.telewebion.com',
 797             },
 798             'params': {
 799                 # rtmpe downloads
 800                 'skip_download': True,
 801             }
 802         },
 803         # Brightcove URL in single quotes
 804         {
 805             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 806             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 807             'info_dict': {
 808                 'id': '4255764656001',
 809                 'ext': 'mp4',
 810                 'title': 'SN Presents: Russell Martin, World Citizen',
 811                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 812                 'uploader': 'Rogers Sportsnet',
 813             },
 814         }
 815     ]
 816
 817     def report_following_redirect(self, new_url):
 818         """Report information extraction."""
 819         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 820
 821     def _extract_rss(self, url, video_id, doc):
 822         playlist_title = doc.find('./channel/title').text
 823         playlist_desc_el = doc.find('./channel/description')
 824         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 825
 826         entries = []
 827         for it in doc.findall('./channel/item'):
 828             next_url = xpath_text(it, 'link', fatal=False)
 829             if not next_url:
 830                 enclosure_nodes = it.findall('./enclosure')
 831                 for e in enclosure_nodes:
 832                     next_url = e.attrib.get('url')
 833                     if next_url:
 834                         break
 835
 836             if not next_url:
 837                 continue
 838
 839             entries.append({
 840                 '_type': 'url',
 841                 'url': next_url,
 842                 'title': it.find('title').text,
 843             })
 844
 845         return {
 846             '_type': 'playlist',
 847             'id': url,
 848             'title': playlist_title,
 849             'description': playlist_desc,
 850             'entries': entries,
 851         }
 852
 853     def _extract_camtasia(self, url, video_id, webpage):
 854         """ Returns None if no camtasia video can be found. """
 855
 856         camtasia_cfg = self._search_regex(
 857             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 858             webpage, 'camtasia configuration file', default=None)
 859         if camtasia_cfg is None:
 860             return None
 861
 862         title = self._html_search_meta('DC.title', webpage, fatal=True)
 863
 864         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 865         camtasia_cfg = self._download_xml(
 866             camtasia_url, video_id,
 867             note='Downloading camtasia configuration',
 868             errnote='Failed to download camtasia configuration')
 869         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 870
 871         entries = []
 872         for n in fileset_node.getchildren():
 873             url_n = n.find('./uri')
 874             if url_n is None:
 875                 continue
 876
 877             entries.append({
 878                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 879                 'title': '%s - %s' % (title, n.tag),
 880                 'url': compat_urlparse.urljoin(url, url_n.text),
 881                 'duration': float_or_none(n.find('./duration').text),
 882             })
 883
 884         return {
 885             '_type': 'playlist',
 886             'entries': entries,
 887             'title': title,
 888         }
 889
 890     def _real_extract(self, url):
 891         if url.startswith('//'):
 892             return {
 893                 '_type': 'url',
 894                 'url': self.http_scheme() + url,
 895             }
 896
 897         parsed_url = compat_urlparse.urlparse(url)
 898         if not parsed_url.scheme:
 899             default_search = self._downloader.params.get('default_search')
 900             if default_search is None:
 901                 default_search = 'fixup_error'
 902
 903             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 904                 if '/' in url:
 905                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 906                     return self.url_result('http://' + url)
 907                 elif default_search != 'fixup_error':
 908                     if default_search == 'auto_warning':
 909                         if re.match(r'^(?:url|URL)$', url):
 910                             raise ExtractorError(
 911                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 912                                 expected=True)
 913                         else:
 914                             self._downloader.report_warning(
 915                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 916                     return self.url_result('ytsearch:' + url)
 917
 918             if default_search in ('error', 'fixup_error'):
 919                 raise ExtractorError(
 920                     '%r is not a valid URL. '
 921                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 922                     % (url, url), expected=True)
 923             else:
 924                 if ':' not in default_search:
 925                     default_search += ':'
 926                 return self.url_result(default_search + url)
 927
 928         url, smuggled_data = unsmuggle_url(url)
 929         force_videoid = None
 930         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 931         if smuggled_data and 'force_videoid' in smuggled_data:
 932             force_videoid = smuggled_data['force_videoid']
 933             video_id = force_videoid
 934         else:
 935             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
 936
 937         self.to_screen('%s: Requesting header' % video_id)
 938
 939         head_req = HEADRequest(url)
 940         head_response = self._request_webpage(
 941             head_req, video_id,
 942             note=False, errnote='Could not send HEAD request to %s' % url,
 943             fatal=False)
 944
 945         if head_response is not False:
 946             # Check for redirect
 947             new_url = head_response.geturl()
 948             if url != new_url:
 949                 self.report_following_redirect(new_url)
 950                 if force_videoid:
 951                     new_url = smuggle_url(
 952                         new_url, {'force_videoid': force_videoid})
 953                 return self.url_result(new_url)
 954
 955         full_response = None
 956         if head_response is False:
 957             request = compat_urllib_request.Request(url)
 958             request.add_header('Accept-Encoding', '*')
 959             full_response = self._request_webpage(request, video_id)
 960             head_response = full_response
 961
 962         # Check for direct link to a video
 963         content_type = head_response.headers.get('Content-Type', '')
 964         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 965         if m:
 966             upload_date = unified_strdate(
 967                 head_response.headers.get('Last-Modified'))
 968             return {
 969                 'id': video_id,
 970                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
 971                 'direct': True,
 972                 'formats': [{
 973                     'format_id': m.group('format_id'),
 974                     'url': url,
 975                     'vcodec': 'none' if m.group('type') == 'audio' else None
 976                 }],
 977                 'upload_date': upload_date,
 978             }
 979
 980         if not self._downloader.params.get('test', False) and not is_intentional:
 981             self._downloader.report_warning('Falling back on generic information extractor.')
 982
 983         if not full_response:
 984             request = compat_urllib_request.Request(url)
 985             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
 986             # making it impossible to download only chunk of the file (yet we need only 512kB to
 987             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
 988             # that will always result in downloading the whole file that is not desirable.
 989             # Therefore for extraction pass we have to override Accept-Encoding to any in order
 990             # to accept raw bytes and being able to download only a chunk.
 991             # It may probably better to solve this by checking Content-Type for application/octet-stream
 992             # after HEAD request finishes, but not sure if we can rely on this.
 993             request.add_header('Accept-Encoding', '*')
 994             full_response = self._request_webpage(request, video_id)
 995
 996         # Maybe it's a direct link to a video?
 997         # Be careful not to download the whole thing!
 998         first_bytes = full_response.read(512)
 999         if not is_html(first_bytes):
1000             self._downloader.report_warning(
1001                 'URL could be a direct video link, returning it as such.')
1002             upload_date = unified_strdate(
1003                 head_response.headers.get('Last-Modified'))
1004             return {
1005                 'id': video_id,
1006                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1007                 'direct': True,
1008                 'url': url,
1009                 'upload_date': upload_date,
1010             }
1011
1012         webpage = self._webpage_read_content(
1013             full_response, url, video_id, prefix=first_bytes)
1014
1015         self.report_extraction(video_id)
1016
1017         # Is it an RSS feed?
1018         try:
1019             doc = parse_xml(webpage)
1020             if doc.tag == 'rss':
1021                 return self._extract_rss(url, video_id, doc)
1022         except compat_xml_parse_error:
1023             pass
1024
1025         # Is it a Camtasia project?
1026         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1027         if camtasia_res is not None:
1028             return camtasia_res
1029
1030         # Sometimes embedded video player is hidden behind percent encoding
1031         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1032         # Unescaping the whole page allows to handle those cases in a generic way
1033         webpage = compat_urllib_parse.unquote(webpage)
1034
1035         # it's tempting to parse this further, but you would
1036         # have to take into account all the variations like
1037         #   Video Title - Site Name
1038         #   Site Name | Video Title
1039         #   Video Title - Tagline | Site Name
1040         # and so on and so forth; it's just not practical
1041         video_title = self._html_search_regex(
1042             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1043             default='video')
1044
1045         # Try to detect age limit automatically
1046         age_limit = self._rta_search(webpage)
1047         # And then there are the jokers who advertise that they use RTA,
1048         # but actually don't.
1049         AGE_LIMIT_MARKERS = [
1050             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1051         ]
1052         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1053             age_limit = 18
1054
1055         # video uploader is domain name
1056         video_uploader = self._search_regex(
1057             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1058
1059         # Helper method
1060         def _playlist_from_matches(matches, getter=None, ie=None):
1061             urlrs = orderedSet(
1062                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1063                 for m in matches)
1064             return self.playlist_result(
1065                 urlrs, playlist_id=video_id, playlist_title=video_title)
1066
1067         # Look for BrightCove:
1068         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1069         if bc_urls:
1070             self.to_screen('Brightcove video detected.')
1071             entries = [{
1072                 '_type': 'url',
1073                 'url': smuggle_url(bc_url, {'Referer': url}),
1074                 'ie_key': 'Brightcove'
1075             } for bc_url in bc_urls]
1076
1077             return {
1078                 '_type': 'playlist',
1079                 'title': video_title,
1080                 'id': video_id,
1081                 'entries': entries,
1082             }
1083
1084         # Look for embedded rtl.nl player
1085         matches = re.findall(
1086             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1087             webpage)
1088         if matches:
1089             return _playlist_from_matches(matches, ie='RtlNl')
1090
1091         # Look for embedded (iframe) Vimeo player
1092         mobj = re.search(
1093             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1094         if mobj:
1095             player_url = unescapeHTML(mobj.group('url'))
1096             surl = smuggle_url(player_url, {'Referer': url})
1097             return self.url_result(surl)
1098         # Look for embedded (swf embed) Vimeo player
1099         mobj = re.search(
1100             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1101         if mobj:
1102             return self.url_result(mobj.group(1))
1103
1104         # Look for embedded YouTube player
1105         matches = re.findall(r'''(?x)
1106             (?:
1107                 <iframe[^>]+?src=|
1108                 data-video-url=|
1109                 <embed[^>]+?src=|
1110                 embedSWF\(?:\s*|
1111                 new\s+SWFObject\(
1112             )
1113             (["\'])
1114                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1115                 (?:embed|v|p)/.+?)
1116             \1''', webpage)
1117         if matches:
1118             return _playlist_from_matches(
1119                 matches, lambda m: unescapeHTML(m[1]))
1120
1121         # Look for lazyYT YouTube embed
1122         matches = re.findall(
1123             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1124         if matches:
1125             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1126
1127         # Look for embedded Dailymotion player
1128         matches = re.findall(
1129             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1130         if matches:
1131             return _playlist_from_matches(
1132                 matches, lambda m: unescapeHTML(m[1]))
1133
1134         # Look for embedded Dailymotion playlist player (#3822)
1135         m = re.search(
1136             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1137         if m:
1138             playlists = re.findall(
1139                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1140             if playlists:
1141                 return _playlist_from_matches(
1142                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1143
1144         # Look for embedded Wistia player
1145         match = re.search(
1146             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1147         if match:
1148             embed_url = self._proto_relative_url(
1149                 unescapeHTML(match.group('url')))
1150             return {
1151                 '_type': 'url_transparent',
1152                 'url': embed_url,
1153                 'ie_key': 'Wistia',
1154                 'uploader': video_uploader,
1155                 'title': video_title,
1156                 'id': video_id,
1157             }
1158
1159         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1160         if match:
1161             return {
1162                 '_type': 'url_transparent',
1163                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1164                 'ie_key': 'Wistia',
1165                 'uploader': video_uploader,
1166                 'title': video_title,
1167                 'id': match.group('id')
1168             }
1169
1170         # Look for embedded blip.tv player
1171         bliptv_url = BlipTVIE._extract_url(webpage)
1172         if bliptv_url:
1173             return self.url_result(bliptv_url, 'BlipTV')
1174
1175         # Look for SVT player
1176         svt_url = SVTIE._extract_url(webpage)
1177         if svt_url:
1178             return self.url_result(svt_url, 'SVT')
1179
1180         # Look for embedded condenast player
1181         matches = re.findall(
1182             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1183             webpage)
1184         if matches:
1185             return {
1186                 '_type': 'playlist',
1187                 'entries': [{
1188                     '_type': 'url',
1189                     'ie_key': 'CondeNast',
1190                     'url': ma,
1191                 } for ma in matches],
1192                 'title': video_title,
1193                 'id': video_id,
1194             }
1195
1196         # Look for Bandcamp pages with custom domain
1197         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1198         if mobj is not None:
1199             burl = unescapeHTML(mobj.group(1))
1200             # Don't set the extractor because it can be a track url or an album
1201             return self.url_result(burl)
1202
1203         # Look for embedded Vevo player
1204         mobj = re.search(
1205             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1206         if mobj is not None:
1207             return self.url_result(mobj.group('url'))
1208
1209         # Look for embedded Viddler player
1210         mobj = re.search(
1211             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1212             webpage)
1213         if mobj is not None:
1214             return self.url_result(mobj.group('url'))
1215
1216         # Look for NYTimes player
1217         mobj = re.search(
1218             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1219             webpage)
1220         if mobj is not None:
1221             return self.url_result(mobj.group('url'))
1222
1223         # Look for Libsyn player
1224         mobj = re.search(
1225             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1226         if mobj is not None:
1227             return self.url_result(mobj.group('url'))
1228
1229         # Look for Ooyala videos
1230         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1231                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1232                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1233                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1234         if mobj is not None:
1235             return OoyalaIE._build_url_result(mobj.group('ec'))
1236
1237         # Look for multiple Ooyala embeds on SBN network websites
1238         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1239         if mobj is not None:
1240             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1241             if embeds:
1242                 return _playlist_from_matches(
1243                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1244
1245         # Look for Aparat videos
1246         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1247         if mobj is not None:
1248             return self.url_result(mobj.group(1), 'Aparat')
1249
1250         # Look for MPORA videos
1251         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1252         if mobj is not None:
1253             return self.url_result(mobj.group(1), 'Mpora')
1254
1255         # Look for embedded NovaMov-based player
1256         mobj = re.search(
1257             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1258                     (?P<url>http://(?:(?:embed|www)\.)?
1259                         (?:novamov\.com|
1260                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1261                            videoweed\.(?:es|com)|
1262                            movshare\.(?:net|sx|ag)|
1263                            divxstage\.(?:eu|net|ch|co|at|ag))
1264                         /embed\.php.+?)\1''', webpage)
1265         if mobj is not None:
1266             return self.url_result(mobj.group('url'))
1267
1268         # Look for embedded Facebook player
1269         mobj = re.search(
1270             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1271         if mobj is not None:
1272             return self.url_result(mobj.group('url'), 'Facebook')
1273
1274         # Look for embedded VK player
1275         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1276         if mobj is not None:
1277             return self.url_result(mobj.group('url'), 'VK')
1278
1279         # Look for embedded ivi player
1280         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1281         if mobj is not None:
1282             return self.url_result(mobj.group('url'), 'Ivi')
1283
1284         # Look for embedded Huffington Post player
1285         mobj = re.search(
1286             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1287         if mobj is not None:
1288             return self.url_result(mobj.group('url'), 'HuffPost')
1289
1290         # Look for embed.ly
1291         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1292         if mobj is not None:
1293             return self.url_result(mobj.group('url'))
1294         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1295         if mobj is not None:
1296             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1297
1298         # Look for funnyordie embed
1299         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1300         if matches:
1301             return _playlist_from_matches(
1302                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1303
1304         # Look for BBC iPlayer embed
1305         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1306         if matches:
1307             return _playlist_from_matches(matches, ie='BBCCoUk')
1308
1309         # Look for embedded RUTV player
1310         rutv_url = RUTVIE._extract_url(webpage)
1311         if rutv_url:
1312             return self.url_result(rutv_url, 'RUTV')
1313
1314         # Look for embedded TVC player
1315         tvc_url = TVCIE._extract_url(webpage)
1316         if tvc_url:
1317             return self.url_result(tvc_url, 'TVC')
1318
1319         # Look for embedded SportBox player
1320         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1321         if sportbox_urls:
1322             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1323
1324         # Look for embedded Tvigle player
1325         mobj = re.search(
1326             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1327         if mobj is not None:
1328             return self.url_result(mobj.group('url'), 'Tvigle')
1329
1330         # Look for embedded TED player
1331         mobj = re.search(
1332             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1333         if mobj is not None:
1334             return self.url_result(mobj.group('url'), 'TED')
1335
1336         # Look for embedded Ustream videos
1337         mobj = re.search(
1338             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1339         if mobj is not None:
1340             return self.url_result(mobj.group('url'), 'Ustream')
1341
1342         # Look for embedded arte.tv player
1343         mobj = re.search(
1344             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1345             webpage)
1346         if mobj is not None:
1347             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1348
1349         # Look for embedded smotri.com player
1350         smotri_url = SmotriIE._extract_url(webpage)
1351         if smotri_url:
1352             return self.url_result(smotri_url, 'Smotri')
1353
1354         # Look for embeded soundcloud player
1355         mobj = re.search(
1356             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1357             webpage)
1358         if mobj is not None:
1359             url = unescapeHTML(mobj.group('url'))
1360             return self.url_result(url)
1361
1362         # Look for embedded vulture.com player
1363         mobj = re.search(
1364             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1365             webpage)
1366         if mobj is not None:
1367             url = unescapeHTML(mobj.group('url'))
1368             return self.url_result(url, ie='Vulture')
1369
1370         # Look for embedded mtvservices player
1371         mobj = re.search(
1372             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1373             webpage)
1374         if mobj is not None:
1375             url = unescapeHTML(mobj.group('url'))
1376             return self.url_result(url, ie='MTVServicesEmbedded')
1377
1378         # Look for embedded yahoo player
1379         mobj = re.search(
1380             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1381             webpage)
1382         if mobj is not None:
1383             return self.url_result(mobj.group('url'), 'Yahoo')
1384
1385         # Look for embedded sbs.com.au player
1386         mobj = re.search(
1387             r'''(?x)
1388             (?:
1389                 <meta\s+property="og:video"\s+content=|
1390                 <iframe[^>]+?src=
1391             )
1392             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1393             webpage)
1394         if mobj is not None:
1395             return self.url_result(mobj.group('url'), 'SBS')
1396
1397         # Look for embedded Cinchcast player
1398         mobj = re.search(
1399             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1400             webpage)
1401         if mobj is not None:
1402             return self.url_result(mobj.group('url'), 'Cinchcast')
1403
1404         mobj = re.search(
1405             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1406             webpage)
1407         if not mobj:
1408             mobj = re.search(
1409                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1410                 webpage)
1411         if mobj is not None:
1412             return self.url_result(mobj.group('url'), 'MLB')
1413
1414         mobj = re.search(
1415             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1416             webpage)
1417         if mobj is not None:
1418             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1419
1420         mobj = re.search(
1421             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1422             webpage)
1423         if mobj is not None:
1424             return self.url_result(mobj.group('url'), 'Livestream')
1425
1426         # Look for Zapiks embed
1427         mobj = re.search(
1428             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1429         if mobj is not None:
1430             return self.url_result(mobj.group('url'), 'Zapiks')
1431
1432         # Look for Kaltura embeds
1433         mobj = re.search(
1434             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1435         if mobj is not None:
1436             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1437
1438         # Look for Eagle.Platform embeds
1439         mobj = re.search(
1440             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1441         if mobj is not None:
1442             return self.url_result(mobj.group('url'), 'EaglePlatform')
1443
1444         # Look for ClipYou (uses Eagle.Platform) embeds
1445         mobj = re.search(
1446             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1447         if mobj is not None:
1448             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1449
1450         # Look for Pladform embeds
1451         mobj = re.search(
1452             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1453         if mobj is not None:
1454             return self.url_result(mobj.group('url'), 'Pladform')
1455
1456         # Look for Playwire embeds
1457         mobj = re.search(
1458             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1459         if mobj is not None:
1460             return self.url_result(mobj.group('url'))
1461
1462         # Look for 5min embeds
1463         mobj = re.search(
1464             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1465         if mobj is not None:
1466             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1467
1468         # Look for Crooks and Liars embeds
1469         mobj = re.search(
1470             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1471         if mobj is not None:
1472             return self.url_result(mobj.group('url'))
1473
1474         # Look for NBC Sports VPlayer embeds
1475         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1476         if nbc_sports_url:
1477             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1478
1479         # Look for UDN embeds
1480         mobj = re.search(
1481             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1482         if mobj is not None:
1483             return self.url_result(
1484                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1485
1486         # Look for Senate ISVP iframe
1487         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1488         if senate_isvp_url:
1489             return self.url_result(senate_isvp_url, 'SenateISVP')
1490
1491         def check_video(vurl):
1492             if YoutubeIE.suitable(vurl):
1493                 return True
1494             vpath = compat_urlparse.urlparse(vurl).path
1495             vext = determine_ext(vpath)
1496             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1497
1498         def filter_video(urls):
1499             return list(filter(check_video, urls))
1500
1501         # Start with something easy: JW Player in SWFObject
1502         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1503         if not found:
1504             # Look for gorilla-vid style embedding
1505             found = filter_video(re.findall(r'''(?sx)
1506                 (?:
1507                     jw_plugins|
1508                     JWPlayerOptions|
1509                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1510                 )
1511                 .*?
1512                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1513         if not found:
1514             # Broaden the search a little bit
1515             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1516         if not found:
1517             # Broaden the findall a little bit: JWPlayer JS loader
1518             found = filter_video(re.findall(
1519                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1520         if not found:
1521             # Flow player
1522             found = filter_video(re.findall(r'''(?xs)
1523                 flowplayer\("[^"]+",\s*
1524                     \{[^}]+?\}\s*,
1525                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1526                         ["']?url["']?\s*:\s*["']([^"']+)["']
1527             ''', webpage))
1528         if not found:
1529             # Cinerama player
1530             found = re.findall(
1531                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1532         if not found:
1533             # Try to find twitter cards info
1534             found = filter_video(re.findall(
1535                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1536         if not found:
1537             # We look for Open Graph info:
1538             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1539             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1540             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1541             if m_video_type is not None:
1542                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1543         if not found:
1544             # HTML5 video
1545             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1546         if not found:
1547             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1548             found = re.search(
1549                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1550                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1551                 webpage)
1552             if not found:
1553                 # Look also in Refresh HTTP header
1554                 refresh_header = head_response.headers.get('Refresh')
1555                 if refresh_header:
1556                     found = re.search(REDIRECT_REGEX, refresh_header)
1557             if found:
1558                 new_url = compat_urlparse.urljoin(url, found.group(1))
1559                 self.report_following_redirect(new_url)
1560                 return {
1561                     '_type': 'url',
1562                     'url': new_url,
1563                 }
1564         if not found:
1565             raise UnsupportedError(url)
1566
1567         entries = []
1568         for video_url in found:
1569             video_url = compat_urlparse.urljoin(url, video_url)
1570             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1571
1572             # Sometimes, jwplayer extraction will result in a YouTube URL
1573             if YoutubeIE.suitable(video_url):
1574                 entries.append(self.url_result(video_url, 'Youtube'))
1575                 continue
1576
1577             # here's a fun little line of code for you:
1578             video_id = os.path.splitext(video_id)[0]
1579
1580             if determine_ext(video_url) == 'smil':
1581                 entries.append({
1582                     'id': video_id,
1583                     'formats': self._extract_smil_formats(video_url, video_id),
1584                     'uploader': video_uploader,
1585                     'title': video_title,
1586                     'age_limit': age_limit,
1587                 })
1588             else:
1589                 entries.append({
1590                     'id': video_id,
1591                     'url': video_url,
1592                     'uploader': video_uploader,
1593                     'title': video_title,
1594                     'age_limit': age_limit,
1595                 })
1596
1597         if len(entries) == 1:
1598             return entries[0]
1599         else:
1600             for num, e in enumerate(entries, start=1):
1601                 # 'url' results don't have a title
1602                 if e.get('title') is not None:
1603                     e['title'] = '%s (%d)' % (e['title'], num)
1604             return {
1605                 '_type': 'playlist',
1606                 'entries': entries,
1607             }