youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     xpath_text,
  30 )
  31 from .brightcove import BrightcoveIE
  32 from .nbc import NBCSportsVPlayerIE
  33 from .ooyala import OoyalaIE
  34 from .rutv import RUTVIE
  35 from .smotri import SmotriIE
  36 from .condenast import CondeNastIE
  37 from .udn import UDNEmbedIE
  38
  39
  40 class GenericIE(InfoExtractor):
  41     IE_DESC = 'Generic downloader that works on some sites'
  42     _VALID_URL = r'.*'
  43     IE_NAME = 'generic'
  44     _TESTS = [
  45         {
  46             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  47             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  48             'info_dict': {
  49                 'id': '13601338388002',
  50                 'ext': 'mp4',
  51                 'uploader': 'www.hodiho.fr',
  52                 'title': 'R\u00e9gis plante sa Jeep',
  53             }
  54         },
  55         # bandcamp page with custom domain
  56         {
  57             'add_ie': ['Bandcamp'],
  58             'url': 'http://bronyrock.com/track/the-pony-mash',
  59             'info_dict': {
  60                 'id': '3235767654',
  61                 'ext': 'mp3',
  62                 'title': 'The Pony Mash',
  63                 'uploader': 'M_Pallante',
  64             },
  65             'skip': 'There is a limit of 200 free downloads / month for the test song',
  66         },
  67         # embedded brightcove video
  68         # it also tests brightcove videos that need to set the 'Referer' in the
  69         # http requests
  70         {
  71             'add_ie': ['Brightcove'],
  72             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  73             'info_dict': {
  74                 'id': '2765128793001',
  75                 'ext': 'mp4',
  76                 'title': 'Le cours de bourse : l’analyse technique',
  77                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  78                 'uploader': 'BFM BUSINESS',
  79             },
  80             'params': {
  81                 'skip_download': True,
  82             },
  83         },
  84         {
  85             # https://github.com/rg3/youtube-dl/issues/2253
  86             'url': 'http://bcove.me/i6nfkrc3',
  87             'md5': '0ba9446db037002366bab3b3eb30c88c',
  88             'info_dict': {
  89                 'id': '3101154703001',
  90                 'ext': 'mp4',
  91                 'title': 'Still no power',
  92                 'uploader': 'thestar.com',
  93                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  94             },
  95             'add_ie': ['Brightcove'],
  96         },
  97         {
  98             'url': 'http://www.championat.com/video/football/v/87/87499.html',
  99             'md5': 'fb973ecf6e4a78a67453647444222983',
 100             'info_dict': {
 101                 'id': '3414141473001',
 102                 'ext': 'mp4',
 103                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 104                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 105                 'uploader': 'Championat',
 106             },
 107         },
 108         {
 109             # https://github.com/rg3/youtube-dl/issues/3541
 110             'add_ie': ['Brightcove'],
 111             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 112             'info_dict': {
 113                 'id': '3866516442001',
 114                 'ext': 'mp4',
 115                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 116                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 117                 'uploader': 'SBS Broadcasting',
 118             },
 119             'skip': 'Restricted to Netherlands',
 120             'params': {
 121                 'skip_download': True,  # m3u8 download
 122             },
 123         },
 124         # Direct link to a video
 125         {
 126             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 127             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 128             'info_dict': {
 129                 'id': 'trailer',
 130                 'ext': 'mp4',
 131                 'title': 'trailer',
 132                 'upload_date': '20100513',
 133             }
 134         },
 135         # ooyala video
 136         {
 137             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 138             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 139             'info_dict': {
 140                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 141                 'ext': 'mp4',
 142                 'title': '2cc213299525360.mov',  # that's what we get
 143             },
 144             'add_ie': ['Ooyala'],
 145         },
 146         # multiple ooyala embeds on SBN network websites
 147         {
 148             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 149             'info_dict': {
 150                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 151                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 152             },
 153             'playlist_mincount': 3,
 154             'params': {
 155                 'skip_download': True,
 156             },
 157             'add_ie': ['Ooyala'],
 158         },
 159         # google redirect
 160         {
 161             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 162             'info_dict': {
 163                 'id': 'cmQHVoWB5FY',
 164                 'ext': 'mp4',
 165                 'upload_date': '20130224',
 166                 'uploader_id': 'TheVerge',
 167                 'description': 're:^Chris Ziegler takes a look at the\.*',
 168                 'uploader': 'The Verge',
 169                 'title': 'First Firefox OS phones side-by-side',
 170             },
 171             'params': {
 172                 'skip_download': False,
 173             }
 174         },
 175         # embed.ly video
 176         {
 177             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 178             'info_dict': {
 179                 'id': '9ODmcdjQcHQ',
 180                 'ext': 'mp4',
 181                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 182                 'upload_date': '20140225',
 183                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 184                 'uploader': 'Tested',
 185                 'uploader_id': 'testedcom',
 186             },
 187             # No need to test YoutubeIE here
 188             'params': {
 189                 'skip_download': True,
 190             },
 191         },
 192         # funnyordie embed
 193         {
 194             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 195             'info_dict': {
 196                 'id': '18e820ec3f',
 197                 'ext': 'mp4',
 198                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 199                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 200             },
 201         },
 202         # BBC iPlayer embeds
 203         {
 204             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 205             'info_dict': {
 206                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 207             },
 208             'playlist_mincount': 18,
 209         },
 210         # RUTV embed
 211         {
 212             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 213             'info_dict': {
 214                 'id': '776940',
 215                 'ext': 'mp4',
 216                 'title': 'Охотское море стало целиком российским',
 217                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 218             },
 219             'params': {
 220                 # m3u8 download
 221                 'skip_download': True,
 222             },
 223         },
 224         # Embedded TED video
 225         {
 226             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 227             'md5': '65fdff94098e4a607385a60c5177c638',
 228             'info_dict': {
 229                 'id': '1969',
 230                 'ext': 'mp4',
 231                 'title': 'Hidden miracles of the natural world',
 232                 'uploader': 'Louie Schwartzberg',
 233                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 234             }
 235         },
 236         # Embeded Ustream video
 237         {
 238             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 239             'md5': '27b99cdb639c9b12a79bca876a073417',
 240             'info_dict': {
 241                 'id': '45734260',
 242                 'ext': 'flv',
 243                 'uploader': 'AU SPA:  The NSA and Privacy',
 244                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 245             }
 246         },
 247         # nowvideo embed hidden behind percent encoding
 248         {
 249             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 250             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 251             'info_dict': {
 252                 'id': '06e53103ca9aa',
 253                 'ext': 'flv',
 254                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 255                 'description': 'No description',
 256             },
 257         },
 258         # arte embed
 259         {
 260             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 261             'md5': '7653032cbb25bf6c80d80f217055fa43',
 262             'info_dict': {
 263                 'id': '048195-004_PLUS7-F',
 264                 'ext': 'flv',
 265                 'title': 'X:enius',
 266                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 267                 'upload_date': '20140320',
 268             },
 269             'params': {
 270                 'skip_download': 'Requires rtmpdump'
 271             }
 272         },
 273         # Condé Nast embed
 274         {
 275             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 276             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 277             'info_dict': {
 278                 'id': '53501be369702d3275860000',
 279                 'ext': 'mp4',
 280                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 281             }
 282         },
 283         # Dailymotion embed
 284         {
 285             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 286             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 287             'info_dict': {
 288                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 289                 'ext': 'mp4',
 290                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 291                 'uploader': 'Spi0n',
 292             },
 293             'add_ie': ['Dailymotion'],
 294         },
 295         # YouTube embed
 296         {
 297             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 298             'info_dict': {
 299                 'id': 'FXRb4ykk4S0',
 300                 'ext': 'mp4',
 301                 'title': 'The NBL Auction 2014',
 302                 'uploader': 'BADMINTON England',
 303                 'uploader_id': 'BADMINTONEvents',
 304                 'upload_date': '20140603',
 305                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 306             },
 307             'add_ie': ['Youtube'],
 308             'params': {
 309                 'skip_download': True,
 310             }
 311         },
 312         # MTVSercices embed
 313         {
 314             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 315             'md5': '35727f82f58c76d996fc188f9755b0d5',
 316             'info_dict': {
 317                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 318                 'ext': 'mp4',
 319                 'title': 'Review',
 320                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 321             },
 322         },
 323         # YouTube embed via <data-embed-url="">
 324         {
 325             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 326             'info_dict': {
 327                 'id': '4vAffPZIT44',
 328                 'ext': 'mp4',
 329                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 330                 'uploader': 'Gameloft',
 331                 'uploader_id': 'gameloft',
 332                 'upload_date': '20140828',
 333                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 334             },
 335             'params': {
 336                 'skip_download': True,
 337             }
 338         },
 339         # Camtasia studio
 340         {
 341             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 342             'playlist': [{
 343                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 344                 'info_dict': {
 345                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 346                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 347                     'ext': 'flv',
 348                     'duration': 2235.90,
 349                 }
 350             }, {
 351                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 352                 'info_dict': {
 353                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 354                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 355                     'ext': 'flv',
 356                     'duration': 2235.93,
 357                 }
 358             }],
 359             'info_dict': {
 360                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 361             }
 362         },
 363         # Flowplayer
 364         {
 365             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 366             'md5': '9d65602bf31c6e20014319c7d07fba27',
 367             'info_dict': {
 368                 'id': '5123ea6d5e5a7',
 369                 'ext': 'mp4',
 370                 'age_limit': 18,
 371                 'uploader': 'www.handjobhub.com',
 372                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 373             }
 374         },
 375         # RSS feed
 376         {
 377             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 378             'info_dict': {
 379                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 380                 'title': 'Zero Punctuation',
 381                 'description': 're:.*groundbreaking video review series.*'
 382             },
 383             'playlist_mincount': 11,
 384         },
 385         # Multiple brightcove videos
 386         # https://github.com/rg3/youtube-dl/issues/2283
 387         {
 388             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 389             'info_dict': {
 390                 'id': 'always-never',
 391                 'title': 'Always / Never - The New Yorker',
 392             },
 393             'playlist_count': 3,
 394             'params': {
 395                 'extract_flat': False,
 396                 'skip_download': True,
 397             }
 398         },
 399         # MLB embed
 400         {
 401             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 402             'md5': '96f09a37e44da40dd083e12d9a683327',
 403             'info_dict': {
 404                 'id': '33322633',
 405                 'ext': 'mp4',
 406                 'title': 'Ump changes call to ball',
 407                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 408                 'duration': 48,
 409                 'timestamp': 1401537900,
 410                 'upload_date': '20140531',
 411                 'thumbnail': 're:^https?://.*\.jpg$',
 412             },
 413         },
 414         # Wistia embed
 415         {
 416             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 417             'md5': '8788b683c777a5cf25621eaf286d0c23',
 418             'info_dict': {
 419                 'id': '1cfaf6b7ea',
 420                 'ext': 'mov',
 421                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 422                 'duration': 643.0,
 423                 'filesize': 182808282,
 424                 'uploader': 'education-portal.com',
 425             },
 426         },
 427         {
 428             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 429             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 430             'info_dict': {
 431                 'id': 'uxjb0lwrcz',
 432                 'ext': 'mp4',
 433                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 434                 'duration': 1715.0,
 435                 'uploader': 'thoughtworks.wistia.com',
 436             },
 437         },
 438         # Direct download with broken HEAD
 439         {
 440             'url': 'http://ai-radio.org:8000/radio.opus',
 441             'info_dict': {
 442                 'id': 'radio',
 443                 'ext': 'opus',
 444                 'title': 'radio',
 445             },
 446             'params': {
 447                 'skip_download': True,  # infinite live stream
 448             },
 449             'expected_warnings': [
 450                 r'501.*Not Implemented'
 451             ],
 452         },
 453         # Soundcloud embed
 454         {
 455             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 456             'info_dict': {
 457                 'id': '174391317',
 458                 'ext': 'mp3',
 459                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 460                 'uploader': 'Sophos Security',
 461                 'title': 'Chet Chat 171 - Oct 29, 2014',
 462                 'upload_date': '20141029',
 463             }
 464         },
 465         # Livestream embed
 466         {
 467             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 468             'info_dict': {
 469                 'id': '67864563',
 470                 'ext': 'flv',
 471                 'upload_date': '20141112',
 472                 'title': 'Rosetta #CometLanding webcast HL 10',
 473             }
 474         },
 475         # LazyYT
 476         {
 477             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 478             'info_dict': {
 479                 'id': '1986',
 480                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 481             },
 482             'playlist_mincount': 2,
 483         },
 484         # Direct link with incorrect MIME type
 485         {
 486             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 487             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 488             'info_dict': {
 489                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 490                 'id': '5_Lennart_Poettering_-_Systemd',
 491                 'ext': 'webm',
 492                 'title': '5_Lennart_Poettering_-_Systemd',
 493                 'upload_date': '20141120',
 494             },
 495             'expected_warnings': [
 496                 'URL could be a direct video link, returning it as such.'
 497             ]
 498         },
 499         # Cinchcast embed
 500         {
 501             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 502             'info_dict': {
 503                 'id': '7141703',
 504                 'ext': 'mp3',
 505                 'upload_date': '20141126',
 506                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 507             }
 508         },
 509         # Cinerama player
 510         {
 511             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 512             'info_dict': {
 513                 'id': '730m_DandD_1901_512k',
 514                 'ext': 'mp4',
 515                 'uploader': 'www.abc.net.au',
 516                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 517             }
 518         },
 519         # embedded viddler video
 520         {
 521             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 522             'info_dict': {
 523                 'id': '4d03aad9',
 524                 'ext': 'mp4',
 525                 'uploader': 'deadspin',
 526                 'title': 'WALL-TO-GORTAT',
 527                 'timestamp': 1422285291,
 528                 'upload_date': '20150126',
 529             },
 530             'add_ie': ['Viddler'],
 531         },
 532         # Libsyn embed
 533         {
 534             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 535             'info_dict': {
 536                 'id': '3377616',
 537                 'ext': 'mp3',
 538                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 539                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 540                 'upload_date': '20150220',
 541             },
 542         },
 543         # jwplayer YouTube
 544         {
 545             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 546             'info_dict': {
 547                 'id': 'Mrj4DVp2zeA',
 548                 'ext': 'mp4',
 549                 'upload_date': '20150212',
 550                 'uploader': 'The National Archives UK',
 551                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 552                 'uploader_id': 'NationalArchives08',
 553                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 554             },
 555         },
 556         # rtl.nl embed
 557         {
 558             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 559             'playlist_mincount': 5,
 560             'info_dict': {
 561                 'id': 'aanslagen-kopenhagen',
 562                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 563             }
 564         },
 565         # Zapiks embed
 566         {
 567             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 568             'info_dict': {
 569                 'id': '118046',
 570                 'ext': 'mp4',
 571                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 572             }
 573         },
 574         # Kaltura embed
 575         {
 576             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 577             'info_dict': {
 578                 'id': '1_eergr3h1',
 579                 'ext': 'mp4',
 580                 'upload_date': '20150226',
 581                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 582                 'timestamp': int,
 583                 'title': 'John Carlson Postgame 2/25/15',
 584             },
 585         },
 586         # Eagle.Platform embed (generic URL)
 587         {
 588             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 589             'info_dict': {
 590                 'id': '227304',
 591                 'ext': 'mp4',
 592                 'title': 'Навальный вышел на свободу',
 593                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 594                 'thumbnail': 're:^https?://.*\.jpg$',
 595                 'duration': 87,
 596                 'view_count': int,
 597                 'age_limit': 0,
 598             },
 599         },
 600         # ClipYou (Eagle.Platform) embed (custom URL)
 601         {
 602             'url': 'http://muz-tv.ru/play/7129/',
 603             'info_dict': {
 604                 'id': '12820',
 605                 'ext': 'mp4',
 606                 'title': "'O Sole Mio",
 607                 'thumbnail': 're:^https?://.*\.jpg$',
 608                 'duration': 216,
 609                 'view_count': int,
 610             },
 611         },
 612         # Pladform embed
 613         {
 614             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 615             'info_dict': {
 616                 'id': '100183293',
 617                 'ext': 'mp4',
 618                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
 619                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 620                 'thumbnail': 're:^https?://.*\.jpg$',
 621                 'duration': 694,
 622                 'age_limit': 0,
 623             },
 624         },
 625         # 5min embed
 626         {
 627             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 628             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 629             'info_dict': {
 630                 'id': '518726732',
 631                 'ext': 'mp4',
 632                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 633             },
 634         },
 635         # RSS feed with enclosure
 636         {
 637             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 638             'info_dict': {
 639                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 640                 'ext': 'm4v',
 641                 'upload_date': '20150228',
 642                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 643             }
 644         },
 645         # NBC Sports vplayer embed
 646         {
 647             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 648             'info_dict': {
 649                 'id': 'ln7x1qSThw4k',
 650                 'ext': 'flv',
 651                 'title': "PFT Live: New leader in the 'new-look' defense",
 652                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 653             },
 654         },
 655         # UDN embed
 656         {
 657             'url': 'http://www.udn.com/news/story/7314/822787',
 658             'md5': 'de06b4c90b042c128395a88f0384817e',
 659             'info_dict': {
 660                 'id': '300040',
 661                 'ext': 'mp4',
 662                 'title': '生物老師男變女 全校挺"做自己"',
 663                 'thumbnail': 're:^https?://.*\.jpg$',
 664             }
 665         }
 666     ]
 667
 668     def report_following_redirect(self, new_url):
 669         """Report information extraction."""
 670         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 671
 672     def _extract_rss(self, url, video_id, doc):
 673         playlist_title = doc.find('./channel/title').text
 674         playlist_desc_el = doc.find('./channel/description')
 675         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 676
 677         entries = []
 678         for it in doc.findall('./channel/item'):
 679             next_url = xpath_text(it, 'link', fatal=False)
 680             if not next_url:
 681                 enclosure_nodes = it.findall('./enclosure')
 682                 for e in enclosure_nodes:
 683                     next_url = e.attrib.get('url')
 684                     if next_url:
 685                         break
 686
 687             if not next_url:
 688                 continue
 689
 690             entries.append({
 691                 '_type': 'url',
 692                 'url': next_url,
 693                 'title': it.find('title').text,
 694             })
 695
 696         return {
 697             '_type': 'playlist',
 698             'id': url,
 699             'title': playlist_title,
 700             'description': playlist_desc,
 701             'entries': entries,
 702         }
 703
 704     def _extract_camtasia(self, url, video_id, webpage):
 705         """ Returns None if no camtasia video can be found. """
 706
 707         camtasia_cfg = self._search_regex(
 708             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 709             webpage, 'camtasia configuration file', default=None)
 710         if camtasia_cfg is None:
 711             return None
 712
 713         title = self._html_search_meta('DC.title', webpage, fatal=True)
 714
 715         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 716         camtasia_cfg = self._download_xml(
 717             camtasia_url, video_id,
 718             note='Downloading camtasia configuration',
 719             errnote='Failed to download camtasia configuration')
 720         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 721
 722         entries = []
 723         for n in fileset_node.getchildren():
 724             url_n = n.find('./uri')
 725             if url_n is None:
 726                 continue
 727
 728             entries.append({
 729                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 730                 'title': '%s - %s' % (title, n.tag),
 731                 'url': compat_urlparse.urljoin(url, url_n.text),
 732                 'duration': float_or_none(n.find('./duration').text),
 733             })
 734
 735         return {
 736             '_type': 'playlist',
 737             'entries': entries,
 738             'title': title,
 739         }
 740
 741     def _real_extract(self, url):
 742         if url.startswith('//'):
 743             return {
 744                 '_type': 'url',
 745                 'url': self.http_scheme() + url,
 746             }
 747
 748         parsed_url = compat_urlparse.urlparse(url)
 749         if not parsed_url.scheme:
 750             default_search = self._downloader.params.get('default_search')
 751             if default_search is None:
 752                 default_search = 'fixup_error'
 753
 754             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 755                 if '/' in url:
 756                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 757                     return self.url_result('http://' + url)
 758                 elif default_search != 'fixup_error':
 759                     if default_search == 'auto_warning':
 760                         if re.match(r'^(?:url|URL)$', url):
 761                             raise ExtractorError(
 762                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 763                                 expected=True)
 764                         else:
 765                             self._downloader.report_warning(
 766                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 767                     return self.url_result('ytsearch:' + url)
 768
 769             if default_search in ('error', 'fixup_error'):
 770                 raise ExtractorError(
 771                     '%r is not a valid URL. '
 772                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 773                     % (url, url), expected=True)
 774             else:
 775                 if ':' not in default_search:
 776                     default_search += ':'
 777                 return self.url_result(default_search + url)
 778
 779         url, smuggled_data = unsmuggle_url(url)
 780         force_videoid = None
 781         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 782         if smuggled_data and 'force_videoid' in smuggled_data:
 783             force_videoid = smuggled_data['force_videoid']
 784             video_id = force_videoid
 785         else:
 786             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 787
 788         self.to_screen('%s: Requesting header' % video_id)
 789
 790         head_req = HEADRequest(url)
 791         head_response = self._request_webpage(
 792             head_req, video_id,
 793             note=False, errnote='Could not send HEAD request to %s' % url,
 794             fatal=False)
 795
 796         if head_response is not False:
 797             # Check for redirect
 798             new_url = head_response.geturl()
 799             if url != new_url:
 800                 self.report_following_redirect(new_url)
 801                 if force_videoid:
 802                     new_url = smuggle_url(
 803                         new_url, {'force_videoid': force_videoid})
 804                 return self.url_result(new_url)
 805
 806         full_response = None
 807         if head_response is False:
 808             full_response = self._request_webpage(url, video_id)
 809             head_response = full_response
 810
 811         # Check for direct link to a video
 812         content_type = head_response.headers.get('Content-Type', '')
 813         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 814         if m:
 815             upload_date = unified_strdate(
 816                 head_response.headers.get('Last-Modified'))
 817             return {
 818                 'id': video_id,
 819                 'title': os.path.splitext(url_basename(url))[0],
 820                 'direct': True,
 821                 'formats': [{
 822                     'format_id': m.group('format_id'),
 823                     'url': url,
 824                     'vcodec': 'none' if m.group('type') == 'audio' else None
 825                 }],
 826                 'upload_date': upload_date,
 827             }
 828
 829         if not self._downloader.params.get('test', False) and not is_intentional:
 830             self._downloader.report_warning('Falling back on generic information extractor.')
 831
 832         if not full_response:
 833             full_response = self._request_webpage(url, video_id)
 834
 835         # Maybe it's a direct link to a video?
 836         # Be careful not to download the whole thing!
 837         first_bytes = full_response.read(512)
 838         if not is_html(first_bytes):
 839             self._downloader.report_warning(
 840                 'URL could be a direct video link, returning it as such.')
 841             upload_date = unified_strdate(
 842                 head_response.headers.get('Last-Modified'))
 843             return {
 844                 'id': video_id,
 845                 'title': os.path.splitext(url_basename(url))[0],
 846                 'direct': True,
 847                 'url': url,
 848                 'upload_date': upload_date,
 849             }
 850
 851         webpage = self._webpage_read_content(
 852             full_response, url, video_id, prefix=first_bytes)
 853
 854         self.report_extraction(video_id)
 855
 856         # Is it an RSS feed?
 857         try:
 858             doc = parse_xml(webpage)
 859             if doc.tag == 'rss':
 860                 return self._extract_rss(url, video_id, doc)
 861         except compat_xml_parse_error:
 862             pass
 863
 864         # Is it a Camtasia project?
 865         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 866         if camtasia_res is not None:
 867             return camtasia_res
 868
 869         # Sometimes embedded video player is hidden behind percent encoding
 870         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 871         # Unescaping the whole page allows to handle those cases in a generic way
 872         webpage = compat_urllib_parse.unquote(webpage)
 873
 874         # it's tempting to parse this further, but you would
 875         # have to take into account all the variations like
 876         #   Video Title - Site Name
 877         #   Site Name | Video Title
 878         #   Video Title - Tagline | Site Name
 879         # and so on and so forth; it's just not practical
 880         video_title = self._html_search_regex(
 881             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 882             default='video')
 883
 884         # Try to detect age limit automatically
 885         age_limit = self._rta_search(webpage)
 886         # And then there are the jokers who advertise that they use RTA,
 887         # but actually don't.
 888         AGE_LIMIT_MARKERS = [
 889             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 890         ]
 891         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 892             age_limit = 18
 893
 894         # video uploader is domain name
 895         video_uploader = self._search_regex(
 896             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 897
 898         # Helper method
 899         def _playlist_from_matches(matches, getter=None, ie=None):
 900             urlrs = orderedSet(
 901                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 902                 for m in matches)
 903             return self.playlist_result(
 904                 urlrs, playlist_id=video_id, playlist_title=video_title)
 905
 906         # Look for BrightCove:
 907         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 908         if bc_urls:
 909             self.to_screen('Brightcove video detected.')
 910             entries = [{
 911                 '_type': 'url',
 912                 'url': smuggle_url(bc_url, {'Referer': url}),
 913                 'ie_key': 'Brightcove'
 914             } for bc_url in bc_urls]
 915
 916             return {
 917                 '_type': 'playlist',
 918                 'title': video_title,
 919                 'id': video_id,
 920                 'entries': entries,
 921             }
 922
 923         # Look for embedded rtl.nl player
 924         matches = re.findall(
 925             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
 926             webpage)
 927         if matches:
 928             return _playlist_from_matches(matches, ie='RtlNl')
 929
 930         # Look for embedded (iframe) Vimeo player
 931         mobj = re.search(
 932             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
 933         if mobj:
 934             player_url = unescapeHTML(mobj.group('url'))
 935             surl = smuggle_url(player_url, {'Referer': url})
 936             return self.url_result(surl)
 937         # Look for embedded (swf embed) Vimeo player
 938         mobj = re.search(
 939             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
 940         if mobj:
 941             return self.url_result(mobj.group(1))
 942
 943         # Look for embedded YouTube player
 944         matches = re.findall(r'''(?x)
 945             (?:
 946                 <iframe[^>]+?src=|
 947                 data-video-url=|
 948                 <embed[^>]+?src=|
 949                 embedSWF\(?:\s*|
 950                 new\s+SWFObject\(
 951             )
 952             (["\'])
 953                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
 954                 (?:embed|v|p)/.+?)
 955             \1''', webpage)
 956         if matches:
 957             return _playlist_from_matches(
 958                 matches, lambda m: unescapeHTML(m[1]))
 959
 960         # Look for lazyYT YouTube embed
 961         matches = re.findall(
 962             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
 963         if matches:
 964             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
 965
 966         # Look for embedded Dailymotion player
 967         matches = re.findall(
 968             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
 969         if matches:
 970             return _playlist_from_matches(
 971                 matches, lambda m: unescapeHTML(m[1]))
 972
 973         # Look for embedded Dailymotion playlist player (#3822)
 974         m = re.search(
 975             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
 976         if m:
 977             playlists = re.findall(
 978                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
 979             if playlists:
 980                 return _playlist_from_matches(
 981                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
 982
 983         # Look for embedded Wistia player
 984         match = re.search(
 985             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
 986         if match:
 987             embed_url = self._proto_relative_url(
 988                 unescapeHTML(match.group('url')))
 989             return {
 990                 '_type': 'url_transparent',
 991                 'url': embed_url,
 992                 'ie_key': 'Wistia',
 993                 'uploader': video_uploader,
 994                 'title': video_title,
 995                 'id': video_id,
 996             }
 997
 998         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
 999         if match:
1000             return {
1001                 '_type': 'url_transparent',
1002                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1003                 'ie_key': 'Wistia',
1004                 'uploader': video_uploader,
1005                 'title': video_title,
1006                 'id': match.group('id')
1007             }
1008
1009         # Look for embedded blip.tv player
1010         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1011         if mobj:
1012             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1013         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1014         if mobj:
1015             return self.url_result(mobj.group(1), 'BlipTV')
1016
1017         # Look for embedded condenast player
1018         matches = re.findall(
1019             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1020             webpage)
1021         if matches:
1022             return {
1023                 '_type': 'playlist',
1024                 'entries': [{
1025                     '_type': 'url',
1026                     'ie_key': 'CondeNast',
1027                     'url': ma,
1028                 } for ma in matches],
1029                 'title': video_title,
1030                 'id': video_id,
1031             }
1032
1033         # Look for Bandcamp pages with custom domain
1034         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1035         if mobj is not None:
1036             burl = unescapeHTML(mobj.group(1))
1037             # Don't set the extractor because it can be a track url or an album
1038             return self.url_result(burl)
1039
1040         # Look for embedded Vevo player
1041         mobj = re.search(
1042             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1043         if mobj is not None:
1044             return self.url_result(mobj.group('url'))
1045
1046         # Look for embedded Viddler player
1047         mobj = re.search(
1048             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1049             webpage)
1050         if mobj is not None:
1051             return self.url_result(mobj.group('url'))
1052
1053         # Look for NYTimes player
1054         mobj = re.search(
1055             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1056             webpage)
1057         if mobj is not None:
1058             return self.url_result(mobj.group('url'))
1059
1060         # Look for Libsyn player
1061         mobj = re.search(
1062             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1063         if mobj is not None:
1064             return self.url_result(mobj.group('url'))
1065
1066         # Look for Ooyala videos
1067         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1068                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1069                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
1070         if mobj is not None:
1071             return OoyalaIE._build_url_result(mobj.group('ec'))
1072
1073         # Look for multiple Ooyala embeds on SBN network websites
1074         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1075         if mobj is not None:
1076             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1077             if embeds:
1078                 return _playlist_from_matches(
1079                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1080
1081         # Look for Aparat videos
1082         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1083         if mobj is not None:
1084             return self.url_result(mobj.group(1), 'Aparat')
1085
1086         # Look for MPORA videos
1087         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1088         if mobj is not None:
1089             return self.url_result(mobj.group(1), 'Mpora')
1090
1091         # Look for embedded NovaMov-based player
1092         mobj = re.search(
1093             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1094                     (?P<url>http://(?:(?:embed|www)\.)?
1095                         (?:novamov\.com|
1096                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1097                            videoweed\.(?:es|com)|
1098                            movshare\.(?:net|sx|ag)|
1099                            divxstage\.(?:eu|net|ch|co|at|ag))
1100                         /embed\.php.+?)\1''', webpage)
1101         if mobj is not None:
1102             return self.url_result(mobj.group('url'))
1103
1104         # Look for embedded Facebook player
1105         mobj = re.search(
1106             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1107         if mobj is not None:
1108             return self.url_result(mobj.group('url'), 'Facebook')
1109
1110         # Look for embedded VK player
1111         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1112         if mobj is not None:
1113             return self.url_result(mobj.group('url'), 'VK')
1114
1115         # Look for embedded ivi player
1116         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1117         if mobj is not None:
1118             return self.url_result(mobj.group('url'), 'Ivi')
1119
1120         # Look for embedded Huffington Post player
1121         mobj = re.search(
1122             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1123         if mobj is not None:
1124             return self.url_result(mobj.group('url'), 'HuffPost')
1125
1126         # Look for embed.ly
1127         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1128         if mobj is not None:
1129             return self.url_result(mobj.group('url'))
1130         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1131         if mobj is not None:
1132             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1133
1134         # Look for funnyordie embed
1135         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1136         if matches:
1137             return _playlist_from_matches(
1138                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1139
1140         # Look for BBC iPlayer embed
1141         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1142         if matches:
1143             return _playlist_from_matches(matches, ie='BBCCoUk')
1144
1145         # Look for embedded RUTV player
1146         rutv_url = RUTVIE._extract_url(webpage)
1147         if rutv_url:
1148             return self.url_result(rutv_url, 'RUTV')
1149
1150         # Look for embedded TED player
1151         mobj = re.search(
1152             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1153         if mobj is not None:
1154             return self.url_result(mobj.group('url'), 'TED')
1155
1156         # Look for embedded Ustream videos
1157         mobj = re.search(
1158             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1159         if mobj is not None:
1160             return self.url_result(mobj.group('url'), 'Ustream')
1161
1162         # Look for embedded arte.tv player
1163         mobj = re.search(
1164             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1165             webpage)
1166         if mobj is not None:
1167             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1168
1169         # Look for embedded smotri.com player
1170         smotri_url = SmotriIE._extract_url(webpage)
1171         if smotri_url:
1172             return self.url_result(smotri_url, 'Smotri')
1173
1174         # Look for embeded soundcloud player
1175         mobj = re.search(
1176             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1177             webpage)
1178         if mobj is not None:
1179             url = unescapeHTML(mobj.group('url'))
1180             return self.url_result(url)
1181
1182         # Look for embedded vulture.com player
1183         mobj = re.search(
1184             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1185             webpage)
1186         if mobj is not None:
1187             url = unescapeHTML(mobj.group('url'))
1188             return self.url_result(url, ie='Vulture')
1189
1190         # Look for embedded mtvservices player
1191         mobj = re.search(
1192             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1193             webpage)
1194         if mobj is not None:
1195             url = unescapeHTML(mobj.group('url'))
1196             return self.url_result(url, ie='MTVServicesEmbedded')
1197
1198         # Look for embedded yahoo player
1199         mobj = re.search(
1200             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1201             webpage)
1202         if mobj is not None:
1203             return self.url_result(mobj.group('url'), 'Yahoo')
1204
1205         # Look for embedded sbs.com.au player
1206         mobj = re.search(
1207             r'''(?x)
1208             (?:
1209                 <meta\s+property="og:video"\s+content=|
1210                 <iframe[^>]+?src=
1211             )
1212             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1213             webpage)
1214         if mobj is not None:
1215             return self.url_result(mobj.group('url'), 'SBS')
1216
1217         # Look for embedded Cinchcast player
1218         mobj = re.search(
1219             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1220             webpage)
1221         if mobj is not None:
1222             return self.url_result(mobj.group('url'), 'Cinchcast')
1223
1224         mobj = re.search(
1225             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1226             webpage)
1227         if mobj is not None:
1228             return self.url_result(mobj.group('url'), 'MLB')
1229
1230         mobj = re.search(
1231             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1232             webpage)
1233         if mobj is not None:
1234             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1235
1236         mobj = re.search(
1237             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1238             webpage)
1239         if mobj is not None:
1240             return self.url_result(mobj.group('url'), 'Livestream')
1241
1242         # Look for Zapiks embed
1243         mobj = re.search(
1244             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1245         if mobj is not None:
1246             return self.url_result(mobj.group('url'), 'Zapiks')
1247
1248         # Look for Kaltura embeds
1249         mobj = re.search(
1250             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1251         if mobj is not None:
1252             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1253
1254         # Look for Eagle.Platform embeds
1255         mobj = re.search(
1256             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1257         if mobj is not None:
1258             return self.url_result(mobj.group('url'), 'EaglePlatform')
1259
1260         # Look for ClipYou (uses Eagle.Platform) embeds
1261         mobj = re.search(
1262             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1263         if mobj is not None:
1264             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1265
1266         # Look for Pladform embeds
1267         mobj = re.search(
1268             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1269         if mobj is not None:
1270             return self.url_result(mobj.group('url'), 'Pladform')
1271
1272         # Look for 5min embeds
1273         mobj = re.search(
1274             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1275         if mobj is not None:
1276             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1277
1278         # Look for NBC Sports VPlayer embeds
1279         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1280         if nbc_sports_url:
1281             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1282
1283         # Look for UDN embeds
1284         mobj = re.search(
1285             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1286         if mobj is not None:
1287             return self.url_result(
1288                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1289
1290         def check_video(vurl):
1291             if YoutubeIE.suitable(vurl):
1292                 return True
1293             vpath = compat_urlparse.urlparse(vurl).path
1294             vext = determine_ext(vpath)
1295             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1296
1297         def filter_video(urls):
1298             return list(filter(check_video, urls))
1299
1300         # Start with something easy: JW Player in SWFObject
1301         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1302         if not found:
1303             # Look for gorilla-vid style embedding
1304             found = filter_video(re.findall(r'''(?sx)
1305                 (?:
1306                     jw_plugins|
1307                     JWPlayerOptions|
1308                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1309                 )
1310                 .*?
1311                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1312         if not found:
1313             # Broaden the search a little bit
1314             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1315         if not found:
1316             # Broaden the findall a little bit: JWPlayer JS loader
1317             found = filter_video(re.findall(
1318                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1319         if not found:
1320             # Flow player
1321             found = filter_video(re.findall(r'''(?xs)
1322                 flowplayer\("[^"]+",\s*
1323                     \{[^}]+?\}\s*,
1324                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1325                         ["']?url["']?\s*:\s*["']([^"']+)["']
1326             ''', webpage))
1327         if not found:
1328             # Cinerama player
1329             found = re.findall(
1330                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1331         if not found:
1332             # Try to find twitter cards info
1333             found = filter_video(re.findall(
1334                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1335         if not found:
1336             # We look for Open Graph info:
1337             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1338             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1339             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1340             if m_video_type is not None:
1341                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1342         if not found:
1343             # HTML5 video
1344             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1345         if not found:
1346             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1347             found = re.search(
1348                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1349                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1350                 webpage)
1351             if not found:
1352                 # Look also in Refresh HTTP header
1353                 refresh_header = head_response.headers.get('Refresh')
1354                 if refresh_header:
1355                     found = re.search(REDIRECT_REGEX, refresh_header)
1356             if found:
1357                 new_url = found.group(1)
1358                 self.report_following_redirect(new_url)
1359                 return {
1360                     '_type': 'url',
1361                     'url': new_url,
1362                 }
1363         if not found:
1364             raise UnsupportedError(url)
1365
1366         entries = []
1367         for video_url in found:
1368             video_url = compat_urlparse.urljoin(url, video_url)
1369             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1370
1371             # Sometimes, jwplayer extraction will result in a YouTube URL
1372             if YoutubeIE.suitable(video_url):
1373                 entries.append(self.url_result(video_url, 'Youtube'))
1374                 continue
1375
1376             # here's a fun little line of code for you:
1377             video_id = os.path.splitext(video_id)[0]
1378
1379             entries.append({
1380                 'id': video_id,
1381                 'url': video_url,
1382                 'uploader': video_uploader,
1383                 'title': video_title,
1384                 'age_limit': age_limit,
1385             })
1386
1387         if len(entries) == 1:
1388             return entries[0]
1389         else:
1390             for num, e in enumerate(entries, start=1):
1391                 # 'url' results don't have a title
1392                 if e.get('title') is not None:
1393                     e['title'] = '%s (%d)' % (e['title'], num)
1394             return {
1395                 '_type': 'playlist',
1396                 'entries': entries,
1397             }