youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     xpath_text,
  30 )
  31 from .brightcove import BrightcoveIE
  32 from .nbc import NBCSportsVPlayerIE
  33 from .ooyala import OoyalaIE
  34 from .rutv import RUTVIE
  35 from .smotri import SmotriIE
  36 from .condenast import CondeNastIE
  37 from .udn import UDNEmbedIE
  38
  39
  40 class GenericIE(InfoExtractor):
  41     IE_DESC = 'Generic downloader that works on some sites'
  42     _VALID_URL = r'.*'
  43     IE_NAME = 'generic'
  44     _TESTS = [
  45         {
  46             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  47             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  48             'info_dict': {
  49                 'id': '13601338388002',
  50                 'ext': 'mp4',
  51                 'uploader': 'www.hodiho.fr',
  52                 'title': 'R\u00e9gis plante sa Jeep',
  53             }
  54         },
  55         # bandcamp page with custom domain
  56         {
  57             'add_ie': ['Bandcamp'],
  58             'url': 'http://bronyrock.com/track/the-pony-mash',
  59             'info_dict': {
  60                 'id': '3235767654',
  61                 'ext': 'mp3',
  62                 'title': 'The Pony Mash',
  63                 'uploader': 'M_Pallante',
  64             },
  65             'skip': 'There is a limit of 200 free downloads / month for the test song',
  66         },
  67         # embedded brightcove video
  68         # it also tests brightcove videos that need to set the 'Referer' in the
  69         # http requests
  70         {
  71             'add_ie': ['Brightcove'],
  72             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  73             'info_dict': {
  74                 'id': '2765128793001',
  75                 'ext': 'mp4',
  76                 'title': 'Le cours de bourse : l’analyse technique',
  77                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  78                 'uploader': 'BFM BUSINESS',
  79             },
  80             'params': {
  81                 'skip_download': True,
  82             },
  83         },
  84         {
  85             # https://github.com/rg3/youtube-dl/issues/2253
  86             'url': 'http://bcove.me/i6nfkrc3',
  87             'md5': '0ba9446db037002366bab3b3eb30c88c',
  88             'info_dict': {
  89                 'id': '3101154703001',
  90                 'ext': 'mp4',
  91                 'title': 'Still no power',
  92                 'uploader': 'thestar.com',
  93                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  94             },
  95             'add_ie': ['Brightcove'],
  96         },
  97         {
  98             'url': 'http://www.championat.com/video/football/v/87/87499.html',
  99             'md5': 'fb973ecf6e4a78a67453647444222983',
 100             'info_dict': {
 101                 'id': '3414141473001',
 102                 'ext': 'mp4',
 103                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 104                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 105                 'uploader': 'Championat',
 106             },
 107         },
 108         {
 109             # https://github.com/rg3/youtube-dl/issues/3541
 110             'add_ie': ['Brightcove'],
 111             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 112             'info_dict': {
 113                 'id': '3866516442001',
 114                 'ext': 'mp4',
 115                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 116                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 117                 'uploader': 'SBS Broadcasting',
 118             },
 119             'skip': 'Restricted to Netherlands',
 120             'params': {
 121                 'skip_download': True,  # m3u8 download
 122             },
 123         },
 124         # Direct link to a video
 125         {
 126             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 127             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 128             'info_dict': {
 129                 'id': 'trailer',
 130                 'ext': 'mp4',
 131                 'title': 'trailer',
 132                 'upload_date': '20100513',
 133             }
 134         },
 135         # ooyala video
 136         {
 137             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 138             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 139             'info_dict': {
 140                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 141                 'ext': 'mp4',
 142                 'title': '2cc213299525360.mov',  # that's what we get
 143             },
 144             'add_ie': ['Ooyala'],
 145         },
 146         # multiple ooyala embeds on SBN network websites
 147         {
 148             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 149             'info_dict': {
 150                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 151                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 152             },
 153             'playlist_mincount': 3,
 154             'params': {
 155                 'skip_download': True,
 156             },
 157             'add_ie': ['Ooyala'],
 158         },
 159         # google redirect
 160         {
 161             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 162             'info_dict': {
 163                 'id': 'cmQHVoWB5FY',
 164                 'ext': 'mp4',
 165                 'upload_date': '20130224',
 166                 'uploader_id': 'TheVerge',
 167                 'description': 're:^Chris Ziegler takes a look at the\.*',
 168                 'uploader': 'The Verge',
 169                 'title': 'First Firefox OS phones side-by-side',
 170             },
 171             'params': {
 172                 'skip_download': False,
 173             }
 174         },
 175         # embed.ly video
 176         {
 177             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 178             'info_dict': {
 179                 'id': '9ODmcdjQcHQ',
 180                 'ext': 'mp4',
 181                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 182                 'upload_date': '20140225',
 183                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 184                 'uploader': 'Tested',
 185                 'uploader_id': 'testedcom',
 186             },
 187             # No need to test YoutubeIE here
 188             'params': {
 189                 'skip_download': True,
 190             },
 191         },
 192         # funnyordie embed
 193         {
 194             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 195             'info_dict': {
 196                 'id': '18e820ec3f',
 197                 'ext': 'mp4',
 198                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 199                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 200             },
 201         },
 202         # BBC iPlayer embeds
 203         {
 204             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 205             'info_dict': {
 206                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 207             },
 208             'playlist_mincount': 18,
 209         },
 210         # RUTV embed
 211         {
 212             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 213             'info_dict': {
 214                 'id': '776940',
 215                 'ext': 'mp4',
 216                 'title': 'Охотское море стало целиком российским',
 217                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 218             },
 219             'params': {
 220                 # m3u8 download
 221                 'skip_download': True,
 222             },
 223         },
 224         # Embedded TED video
 225         {
 226             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 227             'md5': '65fdff94098e4a607385a60c5177c638',
 228             'info_dict': {
 229                 'id': '1969',
 230                 'ext': 'mp4',
 231                 'title': 'Hidden miracles of the natural world',
 232                 'uploader': 'Louie Schwartzberg',
 233                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 234             }
 235         },
 236         # Embeded Ustream video
 237         {
 238             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 239             'md5': '27b99cdb639c9b12a79bca876a073417',
 240             'info_dict': {
 241                 'id': '45734260',
 242                 'ext': 'flv',
 243                 'uploader': 'AU SPA:  The NSA and Privacy',
 244                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 245             }
 246         },
 247         # nowvideo embed hidden behind percent encoding
 248         {
 249             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 250             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 251             'info_dict': {
 252                 'id': '06e53103ca9aa',
 253                 'ext': 'flv',
 254                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 255                 'description': 'No description',
 256             },
 257         },
 258         # arte embed
 259         {
 260             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 261             'md5': '7653032cbb25bf6c80d80f217055fa43',
 262             'info_dict': {
 263                 'id': '048195-004_PLUS7-F',
 264                 'ext': 'flv',
 265                 'title': 'X:enius',
 266                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 267                 'upload_date': '20140320',
 268             },
 269             'params': {
 270                 'skip_download': 'Requires rtmpdump'
 271             }
 272         },
 273         # Condé Nast embed
 274         {
 275             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 276             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 277             'info_dict': {
 278                 'id': '53501be369702d3275860000',
 279                 'ext': 'mp4',
 280                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 281             }
 282         },
 283         # Dailymotion embed
 284         {
 285             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 286             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 287             'info_dict': {
 288                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 289                 'ext': 'mp4',
 290                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 291                 'uploader': 'Spi0n',
 292             },
 293             'add_ie': ['Dailymotion'],
 294         },
 295         # YouTube embed
 296         {
 297             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 298             'info_dict': {
 299                 'id': 'FXRb4ykk4S0',
 300                 'ext': 'mp4',
 301                 'title': 'The NBL Auction 2014',
 302                 'uploader': 'BADMINTON England',
 303                 'uploader_id': 'BADMINTONEvents',
 304                 'upload_date': '20140603',
 305                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 306             },
 307             'add_ie': ['Youtube'],
 308             'params': {
 309                 'skip_download': True,
 310             }
 311         },
 312         # MTVSercices embed
 313         {
 314             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 315             'md5': '35727f82f58c76d996fc188f9755b0d5',
 316             'info_dict': {
 317                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 318                 'ext': 'mp4',
 319                 'title': 'Review',
 320                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 321             },
 322         },
 323         # YouTube embed via <data-embed-url="">
 324         {
 325             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 326             'info_dict': {
 327                 'id': '4vAffPZIT44',
 328                 'ext': 'mp4',
 329                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 330                 'uploader': 'Gameloft',
 331                 'uploader_id': 'gameloft',
 332                 'upload_date': '20140828',
 333                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 334             },
 335             'params': {
 336                 'skip_download': True,
 337             }
 338         },
 339         # Camtasia studio
 340         {
 341             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 342             'playlist': [{
 343                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 344                 'info_dict': {
 345                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 346                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 347                     'ext': 'flv',
 348                     'duration': 2235.90,
 349                 }
 350             }, {
 351                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 352                 'info_dict': {
 353                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 354                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 355                     'ext': 'flv',
 356                     'duration': 2235.93,
 357                 }
 358             }],
 359             'info_dict': {
 360                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 361             }
 362         },
 363         # Flowplayer
 364         {
 365             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 366             'md5': '9d65602bf31c6e20014319c7d07fba27',
 367             'info_dict': {
 368                 'id': '5123ea6d5e5a7',
 369                 'ext': 'mp4',
 370                 'age_limit': 18,
 371                 'uploader': 'www.handjobhub.com',
 372                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 373             }
 374         },
 375         # RSS feed
 376         {
 377             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 378             'info_dict': {
 379                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 380                 'title': 'Zero Punctuation',
 381                 'description': 're:.*groundbreaking video review series.*'
 382             },
 383             'playlist_mincount': 11,
 384         },
 385         # Multiple brightcove videos
 386         # https://github.com/rg3/youtube-dl/issues/2283
 387         {
 388             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 389             'info_dict': {
 390                 'id': 'always-never',
 391                 'title': 'Always / Never - The New Yorker',
 392             },
 393             'playlist_count': 3,
 394             'params': {
 395                 'extract_flat': False,
 396                 'skip_download': True,
 397             }
 398         },
 399         # MLB embed
 400         {
 401             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 402             'md5': '96f09a37e44da40dd083e12d9a683327',
 403             'info_dict': {
 404                 'id': '33322633',
 405                 'ext': 'mp4',
 406                 'title': 'Ump changes call to ball',
 407                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 408                 'duration': 48,
 409                 'timestamp': 1401537900,
 410                 'upload_date': '20140531',
 411                 'thumbnail': 're:^https?://.*\.jpg$',
 412             },
 413         },
 414         # Wistia embed
 415         {
 416             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 417             'md5': '8788b683c777a5cf25621eaf286d0c23',
 418             'info_dict': {
 419                 'id': '1cfaf6b7ea',
 420                 'ext': 'mov',
 421                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 422                 'duration': 643.0,
 423                 'filesize': 182808282,
 424                 'uploader': 'education-portal.com',
 425             },
 426         },
 427         {
 428             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 429             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 430             'info_dict': {
 431                 'id': 'uxjb0lwrcz',
 432                 'ext': 'mp4',
 433                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 434                 'duration': 1715.0,
 435                 'uploader': 'thoughtworks.wistia.com',
 436             },
 437         },
 438         # Direct download with broken HEAD
 439         {
 440             'url': 'http://ai-radio.org:8000/radio.opus',
 441             'info_dict': {
 442                 'id': 'radio',
 443                 'ext': 'opus',
 444                 'title': 'radio',
 445             },
 446             'params': {
 447                 'skip_download': True,  # infinite live stream
 448             },
 449             'expected_warnings': [
 450                 r'501.*Not Implemented'
 451             ],
 452         },
 453         # Soundcloud embed
 454         {
 455             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 456             'info_dict': {
 457                 'id': '174391317',
 458                 'ext': 'mp3',
 459                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 460                 'uploader': 'Sophos Security',
 461                 'title': 'Chet Chat 171 - Oct 29, 2014',
 462                 'upload_date': '20141029',
 463             }
 464         },
 465         # Livestream embed
 466         {
 467             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 468             'info_dict': {
 469                 'id': '67864563',
 470                 'ext': 'flv',
 471                 'upload_date': '20141112',
 472                 'title': 'Rosetta #CometLanding webcast HL 10',
 473             }
 474         },
 475         # LazyYT
 476         {
 477             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 478             'info_dict': {
 479                 'id': '1986',
 480                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 481             },
 482             'playlist_mincount': 2,
 483         },
 484         # Direct link with incorrect MIME type
 485         {
 486             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 487             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 488             'info_dict': {
 489                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 490                 'id': '5_Lennart_Poettering_-_Systemd',
 491                 'ext': 'webm',
 492                 'title': '5_Lennart_Poettering_-_Systemd',
 493                 'upload_date': '20141120',
 494             },
 495             'expected_warnings': [
 496                 'URL could be a direct video link, returning it as such.'
 497             ]
 498         },
 499         # Cinchcast embed
 500         {
 501             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 502             'info_dict': {
 503                 'id': '7141703',
 504                 'ext': 'mp3',
 505                 'upload_date': '20141126',
 506                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 507             }
 508         },
 509         # Cinerama player
 510         {
 511             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 512             'info_dict': {
 513                 'id': '730m_DandD_1901_512k',
 514                 'ext': 'mp4',
 515                 'uploader': 'www.abc.net.au',
 516                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 517             }
 518         },
 519         # embedded viddler video
 520         {
 521             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 522             'info_dict': {
 523                 'id': '4d03aad9',
 524                 'ext': 'mp4',
 525                 'uploader': 'deadspin',
 526                 'title': 'WALL-TO-GORTAT',
 527                 'timestamp': 1422285291,
 528                 'upload_date': '20150126',
 529             },
 530             'add_ie': ['Viddler'],
 531         },
 532         # Libsyn embed
 533         {
 534             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 535             'info_dict': {
 536                 'id': '3377616',
 537                 'ext': 'mp3',
 538                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 539                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 540                 'upload_date': '20150220',
 541             },
 542         },
 543         # jwplayer YouTube
 544         {
 545             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 546             'info_dict': {
 547                 'id': 'Mrj4DVp2zeA',
 548                 'ext': 'mp4',
 549                 'upload_date': '20150212',
 550                 'uploader': 'The National Archives UK',
 551                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 552                 'uploader_id': 'NationalArchives08',
 553                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 554             },
 555         },
 556         # rtl.nl embed
 557         {
 558             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 559             'playlist_mincount': 5,
 560             'info_dict': {
 561                 'id': 'aanslagen-kopenhagen',
 562                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 563             }
 564         },
 565         # Zapiks embed
 566         {
 567             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 568             'info_dict': {
 569                 'id': '118046',
 570                 'ext': 'mp4',
 571                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 572             }
 573         },
 574         # Kaltura embed
 575         {
 576             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 577             'info_dict': {
 578                 'id': '1_eergr3h1',
 579                 'ext': 'mp4',
 580                 'upload_date': '20150226',
 581                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 582                 'timestamp': int,
 583                 'title': 'John Carlson Postgame 2/25/15',
 584             },
 585         },
 586         # Eagle.Platform embed (generic URL)
 587         {
 588             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 589             'info_dict': {
 590                 'id': '227304',
 591                 'ext': 'mp4',
 592                 'title': 'Навальный вышел на свободу',
 593                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 594                 'thumbnail': 're:^https?://.*\.jpg$',
 595                 'duration': 87,
 596                 'view_count': int,
 597                 'age_limit': 0,
 598             },
 599         },
 600         # ClipYou (Eagle.Platform) embed (custom URL)
 601         {
 602             'url': 'http://muz-tv.ru/play/7129/',
 603             'info_dict': {
 604                 'id': '12820',
 605                 'ext': 'mp4',
 606                 'title': "'O Sole Mio",
 607                 'thumbnail': 're:^https?://.*\.jpg$',
 608                 'duration': 216,
 609                 'view_count': int,
 610             },
 611         },
 612         # Pladform embed
 613         {
 614             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 615             'info_dict': {
 616                 'id': '100183293',
 617                 'ext': 'mp4',
 618                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
 619                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 620                 'thumbnail': 're:^https?://.*\.jpg$',
 621                 'duration': 694,
 622                 'age_limit': 0,
 623             },
 624         },
 625         # 5min embed
 626         {
 627             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 628             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 629             'info_dict': {
 630                 'id': '518726732',
 631                 'ext': 'mp4',
 632                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 633             },
 634         },
 635         # RSS feed with enclosure
 636         {
 637             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 638             'info_dict': {
 639                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 640                 'ext': 'm4v',
 641                 'upload_date': '20150228',
 642                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 643             }
 644         },
 645         # Crooks and Liars embed
 646         {
 647             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 648             'info_dict': {
 649                 'id': '8RUoRhRi',
 650                 'ext': 'mp4',
 651                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 652                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 653                 'timestamp': 1428207000,
 654                 'upload_date': '20150405',
 655                 'uploader': 'Heather',
 656             },
 657         },
 658         # Crooks and Liars external embed
 659         {
 660             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 661             'info_dict': {
 662                 'id': 'MTE3MjUtMzQ2MzA',
 663                 'ext': 'mp4',
 664                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 665                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 666                 'timestamp': 1265032391,
 667                 'upload_date': '20100201',
 668                 'uploader': 'Heather',
 669             },
 670         },
 671         # NBC Sports vplayer embed
 672         {
 673             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 674             'info_dict': {
 675                 'id': 'ln7x1qSThw4k',
 676                 'ext': 'flv',
 677                 'title': "PFT Live: New leader in the 'new-look' defense",
 678                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 679             },
 680         },
 681         # UDN embed
 682         {
 683             'url': 'http://www.udn.com/news/story/7314/822787',
 684             'md5': 'de06b4c90b042c128395a88f0384817e',
 685             'info_dict': {
 686                 'id': '300040',
 687                 'ext': 'mp4',
 688                 'title': '生物老師男變女 全校挺"做自己"',
 689                 'thumbnail': 're:^https?://.*\.jpg$',
 690             }
 691         }
 692     ]
 693
 694     def report_following_redirect(self, new_url):
 695         """Report information extraction."""
 696         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 697
 698     def _extract_rss(self, url, video_id, doc):
 699         playlist_title = doc.find('./channel/title').text
 700         playlist_desc_el = doc.find('./channel/description')
 701         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 702
 703         entries = []
 704         for it in doc.findall('./channel/item'):
 705             next_url = xpath_text(it, 'link', fatal=False)
 706             if not next_url:
 707                 enclosure_nodes = it.findall('./enclosure')
 708                 for e in enclosure_nodes:
 709                     next_url = e.attrib.get('url')
 710                     if next_url:
 711                         break
 712
 713             if not next_url:
 714                 continue
 715
 716             entries.append({
 717                 '_type': 'url',
 718                 'url': next_url,
 719                 'title': it.find('title').text,
 720             })
 721
 722         return {
 723             '_type': 'playlist',
 724             'id': url,
 725             'title': playlist_title,
 726             'description': playlist_desc,
 727             'entries': entries,
 728         }
 729
 730     def _extract_camtasia(self, url, video_id, webpage):
 731         """ Returns None if no camtasia video can be found. """
 732
 733         camtasia_cfg = self._search_regex(
 734             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 735             webpage, 'camtasia configuration file', default=None)
 736         if camtasia_cfg is None:
 737             return None
 738
 739         title = self._html_search_meta('DC.title', webpage, fatal=True)
 740
 741         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 742         camtasia_cfg = self._download_xml(
 743             camtasia_url, video_id,
 744             note='Downloading camtasia configuration',
 745             errnote='Failed to download camtasia configuration')
 746         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 747
 748         entries = []
 749         for n in fileset_node.getchildren():
 750             url_n = n.find('./uri')
 751             if url_n is None:
 752                 continue
 753
 754             entries.append({
 755                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 756                 'title': '%s - %s' % (title, n.tag),
 757                 'url': compat_urlparse.urljoin(url, url_n.text),
 758                 'duration': float_or_none(n.find('./duration').text),
 759             })
 760
 761         return {
 762             '_type': 'playlist',
 763             'entries': entries,
 764             'title': title,
 765         }
 766
 767     def _real_extract(self, url):
 768         if url.startswith('//'):
 769             return {
 770                 '_type': 'url',
 771                 'url': self.http_scheme() + url,
 772             }
 773
 774         parsed_url = compat_urlparse.urlparse(url)
 775         if not parsed_url.scheme:
 776             default_search = self._downloader.params.get('default_search')
 777             if default_search is None:
 778                 default_search = 'fixup_error'
 779
 780             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 781                 if '/' in url:
 782                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 783                     return self.url_result('http://' + url)
 784                 elif default_search != 'fixup_error':
 785                     if default_search == 'auto_warning':
 786                         if re.match(r'^(?:url|URL)$', url):
 787                             raise ExtractorError(
 788                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 789                                 expected=True)
 790                         else:
 791                             self._downloader.report_warning(
 792                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 793                     return self.url_result('ytsearch:' + url)
 794
 795             if default_search in ('error', 'fixup_error'):
 796                 raise ExtractorError(
 797                     '%r is not a valid URL. '
 798                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 799                     % (url, url), expected=True)
 800             else:
 801                 if ':' not in default_search:
 802                     default_search += ':'
 803                 return self.url_result(default_search + url)
 804
 805         url, smuggled_data = unsmuggle_url(url)
 806         force_videoid = None
 807         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 808         if smuggled_data and 'force_videoid' in smuggled_data:
 809             force_videoid = smuggled_data['force_videoid']
 810             video_id = force_videoid
 811         else:
 812             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 813
 814         self.to_screen('%s: Requesting header' % video_id)
 815
 816         head_req = HEADRequest(url)
 817         head_response = self._request_webpage(
 818             head_req, video_id,
 819             note=False, errnote='Could not send HEAD request to %s' % url,
 820             fatal=False)
 821
 822         if head_response is not False:
 823             # Check for redirect
 824             new_url = head_response.geturl()
 825             if url != new_url:
 826                 self.report_following_redirect(new_url)
 827                 if force_videoid:
 828                     new_url = smuggle_url(
 829                         new_url, {'force_videoid': force_videoid})
 830                 return self.url_result(new_url)
 831
 832         full_response = None
 833         if head_response is False:
 834             full_response = self._request_webpage(url, video_id)
 835             head_response = full_response
 836
 837         # Check for direct link to a video
 838         content_type = head_response.headers.get('Content-Type', '')
 839         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 840         if m:
 841             upload_date = unified_strdate(
 842                 head_response.headers.get('Last-Modified'))
 843             return {
 844                 'id': video_id,
 845                 'title': os.path.splitext(url_basename(url))[0],
 846                 'direct': True,
 847                 'formats': [{
 848                     'format_id': m.group('format_id'),
 849                     'url': url,
 850                     'vcodec': 'none' if m.group('type') == 'audio' else None
 851                 }],
 852                 'upload_date': upload_date,
 853             }
 854
 855         if not self._downloader.params.get('test', False) and not is_intentional:
 856             self._downloader.report_warning('Falling back on generic information extractor.')
 857
 858         if not full_response:
 859             full_response = self._request_webpage(url, video_id)
 860
 861         # Maybe it's a direct link to a video?
 862         # Be careful not to download the whole thing!
 863         first_bytes = full_response.read(512)
 864         if not is_html(first_bytes):
 865             self._downloader.report_warning(
 866                 'URL could be a direct video link, returning it as such.')
 867             upload_date = unified_strdate(
 868                 head_response.headers.get('Last-Modified'))
 869             return {
 870                 'id': video_id,
 871                 'title': os.path.splitext(url_basename(url))[0],
 872                 'direct': True,
 873                 'url': url,
 874                 'upload_date': upload_date,
 875             }
 876
 877         webpage = self._webpage_read_content(
 878             full_response, url, video_id, prefix=first_bytes)
 879
 880         self.report_extraction(video_id)
 881
 882         # Is it an RSS feed?
 883         try:
 884             doc = parse_xml(webpage)
 885             if doc.tag == 'rss':
 886                 return self._extract_rss(url, video_id, doc)
 887         except compat_xml_parse_error:
 888             pass
 889
 890         # Is it a Camtasia project?
 891         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 892         if camtasia_res is not None:
 893             return camtasia_res
 894
 895         # Sometimes embedded video player is hidden behind percent encoding
 896         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 897         # Unescaping the whole page allows to handle those cases in a generic way
 898         webpage = compat_urllib_parse.unquote(webpage)
 899
 900         # it's tempting to parse this further, but you would
 901         # have to take into account all the variations like
 902         #   Video Title - Site Name
 903         #   Site Name | Video Title
 904         #   Video Title - Tagline | Site Name
 905         # and so on and so forth; it's just not practical
 906         video_title = self._html_search_regex(
 907             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 908             default='video')
 909
 910         # Try to detect age limit automatically
 911         age_limit = self._rta_search(webpage)
 912         # And then there are the jokers who advertise that they use RTA,
 913         # but actually don't.
 914         AGE_LIMIT_MARKERS = [
 915             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 916         ]
 917         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 918             age_limit = 18
 919
 920         # video uploader is domain name
 921         video_uploader = self._search_regex(
 922             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 923
 924         # Helper method
 925         def _playlist_from_matches(matches, getter=None, ie=None):
 926             urlrs = orderedSet(
 927                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 928                 for m in matches)
 929             return self.playlist_result(
 930                 urlrs, playlist_id=video_id, playlist_title=video_title)
 931
 932         # Look for BrightCove:
 933         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 934         if bc_urls:
 935             self.to_screen('Brightcove video detected.')
 936             entries = [{
 937                 '_type': 'url',
 938                 'url': smuggle_url(bc_url, {'Referer': url}),
 939                 'ie_key': 'Brightcove'
 940             } for bc_url in bc_urls]
 941
 942             return {
 943                 '_type': 'playlist',
 944                 'title': video_title,
 945                 'id': video_id,
 946                 'entries': entries,
 947             }
 948
 949         # Look for embedded rtl.nl player
 950         matches = re.findall(
 951             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
 952             webpage)
 953         if matches:
 954             return _playlist_from_matches(matches, ie='RtlNl')
 955
 956         # Look for embedded (iframe) Vimeo player
 957         mobj = re.search(
 958             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
 959         if mobj:
 960             player_url = unescapeHTML(mobj.group('url'))
 961             surl = smuggle_url(player_url, {'Referer': url})
 962             return self.url_result(surl)
 963         # Look for embedded (swf embed) Vimeo player
 964         mobj = re.search(
 965             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
 966         if mobj:
 967             return self.url_result(mobj.group(1))
 968
 969         # Look for embedded YouTube player
 970         matches = re.findall(r'''(?x)
 971             (?:
 972                 <iframe[^>]+?src=|
 973                 data-video-url=|
 974                 <embed[^>]+?src=|
 975                 embedSWF\(?:\s*|
 976                 new\s+SWFObject\(
 977             )
 978             (["\'])
 979                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
 980                 (?:embed|v|p)/.+?)
 981             \1''', webpage)
 982         if matches:
 983             return _playlist_from_matches(
 984                 matches, lambda m: unescapeHTML(m[1]))
 985
 986         # Look for lazyYT YouTube embed
 987         matches = re.findall(
 988             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
 989         if matches:
 990             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
 991
 992         # Look for embedded Dailymotion player
 993         matches = re.findall(
 994             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
 995         if matches:
 996             return _playlist_from_matches(
 997                 matches, lambda m: unescapeHTML(m[1]))
 998
 999         # Look for embedded Dailymotion playlist player (#3822)
1000         m = re.search(
1001             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1002         if m:
1003             playlists = re.findall(
1004                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1005             if playlists:
1006                 return _playlist_from_matches(
1007                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1008
1009         # Look for embedded Wistia player
1010         match = re.search(
1011             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1012         if match:
1013             embed_url = self._proto_relative_url(
1014                 unescapeHTML(match.group('url')))
1015             return {
1016                 '_type': 'url_transparent',
1017                 'url': embed_url,
1018                 'ie_key': 'Wistia',
1019                 'uploader': video_uploader,
1020                 'title': video_title,
1021                 'id': video_id,
1022             }
1023
1024         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1025         if match:
1026             return {
1027                 '_type': 'url_transparent',
1028                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1029                 'ie_key': 'Wistia',
1030                 'uploader': video_uploader,
1031                 'title': video_title,
1032                 'id': match.group('id')
1033             }
1034
1035         # Look for embedded blip.tv player
1036         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1037         if mobj:
1038             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1039         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1040         if mobj:
1041             return self.url_result(mobj.group(1), 'BlipTV')
1042
1043         # Look for embedded condenast player
1044         matches = re.findall(
1045             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1046             webpage)
1047         if matches:
1048             return {
1049                 '_type': 'playlist',
1050                 'entries': [{
1051                     '_type': 'url',
1052                     'ie_key': 'CondeNast',
1053                     'url': ma,
1054                 } for ma in matches],
1055                 'title': video_title,
1056                 'id': video_id,
1057             }
1058
1059         # Look for Bandcamp pages with custom domain
1060         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1061         if mobj is not None:
1062             burl = unescapeHTML(mobj.group(1))
1063             # Don't set the extractor because it can be a track url or an album
1064             return self.url_result(burl)
1065
1066         # Look for embedded Vevo player
1067         mobj = re.search(
1068             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1069         if mobj is not None:
1070             return self.url_result(mobj.group('url'))
1071
1072         # Look for embedded Viddler player
1073         mobj = re.search(
1074             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1075             webpage)
1076         if mobj is not None:
1077             return self.url_result(mobj.group('url'))
1078
1079         # Look for NYTimes player
1080         mobj = re.search(
1081             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1082             webpage)
1083         if mobj is not None:
1084             return self.url_result(mobj.group('url'))
1085
1086         # Look for Libsyn player
1087         mobj = re.search(
1088             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1089         if mobj is not None:
1090             return self.url_result(mobj.group('url'))
1091
1092         # Look for Ooyala videos
1093         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1094                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1095                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
1096         if mobj is not None:
1097             return OoyalaIE._build_url_result(mobj.group('ec'))
1098
1099         # Look for multiple Ooyala embeds on SBN network websites
1100         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1101         if mobj is not None:
1102             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1103             if embeds:
1104                 return _playlist_from_matches(
1105                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1106
1107         # Look for Aparat videos
1108         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1109         if mobj is not None:
1110             return self.url_result(mobj.group(1), 'Aparat')
1111
1112         # Look for MPORA videos
1113         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1114         if mobj is not None:
1115             return self.url_result(mobj.group(1), 'Mpora')
1116
1117         # Look for embedded NovaMov-based player
1118         mobj = re.search(
1119             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1120                     (?P<url>http://(?:(?:embed|www)\.)?
1121                         (?:novamov\.com|
1122                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1123                            videoweed\.(?:es|com)|
1124                            movshare\.(?:net|sx|ag)|
1125                            divxstage\.(?:eu|net|ch|co|at|ag))
1126                         /embed\.php.+?)\1''', webpage)
1127         if mobj is not None:
1128             return self.url_result(mobj.group('url'))
1129
1130         # Look for embedded Facebook player
1131         mobj = re.search(
1132             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1133         if mobj is not None:
1134             return self.url_result(mobj.group('url'), 'Facebook')
1135
1136         # Look for embedded VK player
1137         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1138         if mobj is not None:
1139             return self.url_result(mobj.group('url'), 'VK')
1140
1141         # Look for embedded ivi player
1142         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1143         if mobj is not None:
1144             return self.url_result(mobj.group('url'), 'Ivi')
1145
1146         # Look for embedded Huffington Post player
1147         mobj = re.search(
1148             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1149         if mobj is not None:
1150             return self.url_result(mobj.group('url'), 'HuffPost')
1151
1152         # Look for embed.ly
1153         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1154         if mobj is not None:
1155             return self.url_result(mobj.group('url'))
1156         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1157         if mobj is not None:
1158             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1159
1160         # Look for funnyordie embed
1161         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1162         if matches:
1163             return _playlist_from_matches(
1164                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1165
1166         # Look for BBC iPlayer embed
1167         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1168         if matches:
1169             return _playlist_from_matches(matches, ie='BBCCoUk')
1170
1171         # Look for embedded RUTV player
1172         rutv_url = RUTVIE._extract_url(webpage)
1173         if rutv_url:
1174             return self.url_result(rutv_url, 'RUTV')
1175
1176         # Look for embedded TED player
1177         mobj = re.search(
1178             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1179         if mobj is not None:
1180             return self.url_result(mobj.group('url'), 'TED')
1181
1182         # Look for embedded Ustream videos
1183         mobj = re.search(
1184             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1185         if mobj is not None:
1186             return self.url_result(mobj.group('url'), 'Ustream')
1187
1188         # Look for embedded arte.tv player
1189         mobj = re.search(
1190             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1191             webpage)
1192         if mobj is not None:
1193             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1194
1195         # Look for embedded smotri.com player
1196         smotri_url = SmotriIE._extract_url(webpage)
1197         if smotri_url:
1198             return self.url_result(smotri_url, 'Smotri')
1199
1200         # Look for embeded soundcloud player
1201         mobj = re.search(
1202             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1203             webpage)
1204         if mobj is not None:
1205             url = unescapeHTML(mobj.group('url'))
1206             return self.url_result(url)
1207
1208         # Look for embedded vulture.com player
1209         mobj = re.search(
1210             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1211             webpage)
1212         if mobj is not None:
1213             url = unescapeHTML(mobj.group('url'))
1214             return self.url_result(url, ie='Vulture')
1215
1216         # Look for embedded mtvservices player
1217         mobj = re.search(
1218             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1219             webpage)
1220         if mobj is not None:
1221             url = unescapeHTML(mobj.group('url'))
1222             return self.url_result(url, ie='MTVServicesEmbedded')
1223
1224         # Look for embedded yahoo player
1225         mobj = re.search(
1226             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1227             webpage)
1228         if mobj is not None:
1229             return self.url_result(mobj.group('url'), 'Yahoo')
1230
1231         # Look for embedded sbs.com.au player
1232         mobj = re.search(
1233             r'''(?x)
1234             (?:
1235                 <meta\s+property="og:video"\s+content=|
1236                 <iframe[^>]+?src=
1237             )
1238             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1239             webpage)
1240         if mobj is not None:
1241             return self.url_result(mobj.group('url'), 'SBS')
1242
1243         # Look for embedded Cinchcast player
1244         mobj = re.search(
1245             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1246             webpage)
1247         if mobj is not None:
1248             return self.url_result(mobj.group('url'), 'Cinchcast')
1249
1250         mobj = re.search(
1251             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1252             webpage)
1253         if mobj is not None:
1254             return self.url_result(mobj.group('url'), 'MLB')
1255
1256         mobj = re.search(
1257             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1258             webpage)
1259         if mobj is not None:
1260             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1261
1262         mobj = re.search(
1263             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1264             webpage)
1265         if mobj is not None:
1266             return self.url_result(mobj.group('url'), 'Livestream')
1267
1268         # Look for Zapiks embed
1269         mobj = re.search(
1270             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1271         if mobj is not None:
1272             return self.url_result(mobj.group('url'), 'Zapiks')
1273
1274         # Look for Kaltura embeds
1275         mobj = re.search(
1276             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1277         if mobj is not None:
1278             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1279
1280         # Look for Eagle.Platform embeds
1281         mobj = re.search(
1282             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1283         if mobj is not None:
1284             return self.url_result(mobj.group('url'), 'EaglePlatform')
1285
1286         # Look for ClipYou (uses Eagle.Platform) embeds
1287         mobj = re.search(
1288             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1289         if mobj is not None:
1290             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1291
1292         # Look for Pladform embeds
1293         mobj = re.search(
1294             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1295         if mobj is not None:
1296             return self.url_result(mobj.group('url'), 'Pladform')
1297
1298         # Look for 5min embeds
1299         mobj = re.search(
1300             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1301         if mobj is not None:
1302             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1303
1304         # Look for Crooks and Liars embeds
1305         mobj = re.search(
1306             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1307         if mobj is not None:
1308             return self.url_result(mobj.group('url'))
1309
1310         # Look for NBC Sports VPlayer embeds
1311         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1312         if nbc_sports_url:
1313             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1314
1315         # Look for UDN embeds
1316         mobj = re.search(
1317             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1318         if mobj is not None:
1319             return self.url_result(
1320                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1321
1322         def check_video(vurl):
1323             if YoutubeIE.suitable(vurl):
1324                 return True
1325             vpath = compat_urlparse.urlparse(vurl).path
1326             vext = determine_ext(vpath)
1327             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1328
1329         def filter_video(urls):
1330             return list(filter(check_video, urls))
1331
1332         # Start with something easy: JW Player in SWFObject
1333         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1334         if not found:
1335             # Look for gorilla-vid style embedding
1336             found = filter_video(re.findall(r'''(?sx)
1337                 (?:
1338                     jw_plugins|
1339                     JWPlayerOptions|
1340                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1341                 )
1342                 .*?
1343                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1344         if not found:
1345             # Broaden the search a little bit
1346             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1347         if not found:
1348             # Broaden the findall a little bit: JWPlayer JS loader
1349             found = filter_video(re.findall(
1350                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1351         if not found:
1352             # Flow player
1353             found = filter_video(re.findall(r'''(?xs)
1354                 flowplayer\("[^"]+",\s*
1355                     \{[^}]+?\}\s*,
1356                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1357                         ["']?url["']?\s*:\s*["']([^"']+)["']
1358             ''', webpage))
1359         if not found:
1360             # Cinerama player
1361             found = re.findall(
1362                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1363         if not found:
1364             # Try to find twitter cards info
1365             found = filter_video(re.findall(
1366                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1367         if not found:
1368             # We look for Open Graph info:
1369             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1370             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1371             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1372             if m_video_type is not None:
1373                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1374         if not found:
1375             # HTML5 video
1376             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1377         if not found:
1378             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1379             found = re.search(
1380                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1381                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1382                 webpage)
1383             if not found:
1384                 # Look also in Refresh HTTP header
1385                 refresh_header = head_response.headers.get('Refresh')
1386                 if refresh_header:
1387                     found = re.search(REDIRECT_REGEX, refresh_header)
1388             if found:
1389                 new_url = found.group(1)
1390                 self.report_following_redirect(new_url)
1391                 return {
1392                     '_type': 'url',
1393                     'url': new_url,
1394                 }
1395         if not found:
1396             raise UnsupportedError(url)
1397
1398         entries = []
1399         for video_url in found:
1400             video_url = compat_urlparse.urljoin(url, video_url)
1401             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1402
1403             # Sometimes, jwplayer extraction will result in a YouTube URL
1404             if YoutubeIE.suitable(video_url):
1405                 entries.append(self.url_result(video_url, 'Youtube'))
1406                 continue
1407
1408             # here's a fun little line of code for you:
1409             video_id = os.path.splitext(video_id)[0]
1410
1411             entries.append({
1412                 'id': video_id,
1413                 'url': video_url,
1414                 'uploader': video_uploader,
1415                 'title': video_title,
1416                 'age_limit': age_limit,
1417             })
1418
1419         if len(entries) == 1:
1420             return entries[0]
1421         else:
1422             for num, e in enumerate(entries, start=1):
1423                 # 'url' results don't have a title
1424                 if e.get('title') is not None:
1425                     e['title'] = '%s (%d)' % (e['title'], num)
1426             return {
1427                 '_type': 'playlist',
1428                 'entries': entries,
1429             }