3 from __future__ import unicode_literals
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12 compat_etree_fromstring,
14 compat_urllib_parse_unquote,
16 compat_xml_parse_error,
36 from .commonprotocols import RtmpIE
37 from .brightcove import (
45 from .nbc import NBCSportsVPlayerIE
46 from .ooyala import OoyalaIE
47 from .rutv import RUTVIE
48 from .tvc import TVCIE
49 from .sportbox import SportBoxEmbedIE
50 from .smotri import SmotriIE
51 from .myvi import MyviIE
52 from .condenast import CondeNastIE
53 from .udn import UDNEmbedIE
54 from .senateisvp import SenateISVPIE
55 from .svt import SVTIE
56 from .pornhub import PornHubIE
57 from .xhamster import XHamsterEmbedIE
58 from .tnaflix import TNAFlixNetworkEmbedIE
59 from .drtuber import DrTuberIE
60 from .redtube import RedTubeIE
61 from .vimeo import VimeoIE
62 from .dailymotion import (
66 from .dailymail import DailyMailIE
67 from .onionstudios import OnionStudiosIE
68 from .viewlift import ViewLiftEmbedIE
69 from .mtv import MTVServicesEmbeddedIE
70 from .pladform import PladformIE
71 from .videomore import VideomoreIE
72 from .webcaster import WebcasterFeedIE
73 from .googledrive import GoogleDriveIE
74 from .jwplatform import JWPlatformIE
75 from .digiteka import DigitekaIE
76 from .arkena import ArkenaIE
77 from .instagram import InstagramIE
78 from .liveleak import LiveLeakIE
79 from .threeqsdn import ThreeQSDNIE
80 from .theplatform import ThePlatformIE
81 from .vessel import VesselIE
82 from .kaltura import KalturaIE
83 from .eagleplatform import EaglePlatformIE
84 from .facebook import FacebookIE
85 from .soundcloud import SoundcloudIE
86 from .tunein import TuneInBaseIE
87 from .vbox7 import Vbox7IE
88 from .dbtv import DBTVIE
89 from .piksel import PikselIE
90 from .videa import VideaIE
91 from .twentymin import TwentyMinutenIE
92 from .ustream import UstreamIE
93 from .openload import OpenloadIE
94 from .videopress import VideoPressIE
95 from .rutube import RutubeIE
96 from .limelight import LimelightBaseIE
97 from .anvato import AnvatoIE
98 from .washingtonpost import WashingtonPostIE
99 from .wistia import WistiaIE
100 from .mediaset import MediasetIE
101 from .joj import JojIE
102 from .megaphone import MegaphoneIE
103 from .vzaar import VzaarIE
104 from .channel9 import Channel9IE
105 from .vshare import VShareIE
108 class GenericIE(InfoExtractor):
109 IE_DESC = 'Generic downloader that works on some sites'
113 # Direct link to a video
115 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
116 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
121 'upload_date': '20100513',
124 # Direct link to media delivered compressed (until Accept-Encoding is *)
126 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
127 'md5': '128c42e68b13950268b648275386fc74',
129 'id': 'FictionJunction-Parallel_Hearts',
131 'title': 'FictionJunction-Parallel_Hearts',
132 'upload_date': '20140522',
134 'expected_warnings': [
135 'URL could be a direct video link, returning it as such.'
137 'skip': 'URL invalid',
139 # Direct download with broken HEAD
141 'url': 'http://ai-radio.org:8000/radio.opus',
148 'skip_download': True, # infinite live stream
150 'expected_warnings': [
151 r'501.*Not Implemented',
155 # Direct link with incorrect MIME type
157 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
158 'md5': '4ccbebe5f36706d85221f204d7eb5913',
160 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
161 'id': '5_Lennart_Poettering_-_Systemd',
163 'title': '5_Lennart_Poettering_-_Systemd',
164 'upload_date': '20141120',
166 'expected_warnings': [
167 'URL could be a direct video link, returning it as such.'
172 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
174 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
175 'title': 'Zero Punctuation',
176 'description': 're:.*groundbreaking video review series.*'
178 'playlist_mincount': 11,
180 # RSS feed with enclosure
182 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
184 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
186 'upload_date': '20150228',
187 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
190 # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
192 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
196 'title': 'Automatics, robotics and biocybernetics',
197 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
198 'upload_date': '20130627',
199 'formats': 'mincount:16',
200 'subtitles': 'mincount:1',
203 'force_generic_extractor': True,
204 'skip_download': True,
207 # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
209 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
214 'formats': 'mincount:1',
217 'skip_download': True,
220 # SMIL from https://www.restudy.dk/video/play/id/1637
222 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
226 'title': 'video_1637',
227 'formats': 'mincount:3',
230 'skip_download': True,
233 # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
235 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
237 'id': 'smil-service',
239 'title': 'smil-service',
240 'formats': 'mincount:1',
243 'skip_download': True,
246 # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
248 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
252 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
253 'formats': 'mincount:3',
256 'skip_download': True,
259 # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
261 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
263 'id': 'mZlp2ctYIUEB',
265 'title': 'Tikibad ontruimd wegens brand',
266 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
267 'thumbnail': r're:^https?://.*\.jpg$',
271 'skip_download': True,
274 # MPD from http://dash-mse-test.appspot.com/media.html
276 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
277 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
279 'id': 'car-20120827-manifest',
281 'title': 'car-20120827-manifest',
282 'formats': 'mincount:9',
283 'upload_date': '20130904',
286 'format': 'bestvideo',
289 # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
291 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
296 'formats': 'mincount:8',
300 'skip_download': True,
302 'skip': 'video gone',
304 # m3u8 served with Content-Type: text/plain
306 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
311 'upload_date': '20140720',
312 'formats': 'mincount:11',
316 'skip_download': True,
318 'skip': 'video gone',
322 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
326 'upload_date': '20130224',
327 'uploader_id': 'TheVerge',
328 'description': r're:^Chris Ziegler takes a look at the\.*',
329 'uploader': 'The Verge',
330 'title': 'First Firefox OS phones side-by-side',
333 'skip_download': False,
337 # redirect in Refresh HTTP header
338 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
342 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
343 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
344 'upload_date': '20150917',
345 'uploader_id': 'brtvofficial',
346 'uploader': 'Boiler Room',
349 'skip_download': False,
353 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
354 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
356 'id': '13601338388002',
358 'uploader': 'www.hodiho.fr',
359 'title': 'R\u00e9gis plante sa Jeep',
362 # bandcamp page with custom domain
364 'add_ie': ['Bandcamp'],
365 'url': 'http://bronyrock.com/track/the-pony-mash',
369 'title': 'The Pony Mash',
370 'uploader': 'M_Pallante',
372 'skip': 'There is a limit of 200 free downloads / month for the test song',
375 # embedded brightcove video
376 # it also tests brightcove videos that need to set the 'Referer'
377 # in the http requests
378 'add_ie': ['BrightcoveLegacy'],
379 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
381 'id': '2765128793001',
383 'title': 'Le cours de bourse : l’analyse technique',
384 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
385 'uploader': 'BFM BUSINESS',
388 'skip_download': True,
392 # embedded with itemprop embedURL and video id spelled as `idVideo`
393 'add_id': ['BrightcoveLegacy'],
394 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
396 'id': '5255628253001',
398 'title': 'md5:37c519b1128915607601e75a87995fc0',
399 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
400 'uploader': 'BFM BUSINESS',
401 'uploader_id': '876450612001',
402 'timestamp': 1482255315,
403 'upload_date': '20161220',
406 'skip_download': True,
410 # https://github.com/rg3/youtube-dl/issues/2253
411 'url': 'http://bcove.me/i6nfkrc3',
412 'md5': '0ba9446db037002366bab3b3eb30c88c',
414 'id': '3101154703001',
416 'title': 'Still no power',
417 'uploader': 'thestar.com',
418 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
420 'add_ie': ['BrightcoveLegacy'],
421 'skip': 'video gone',
424 'url': 'http://www.championat.com/video/football/v/87/87499.html',
425 'md5': 'fb973ecf6e4a78a67453647444222983',
427 'id': '3414141473001',
429 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
430 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
431 'uploader': 'Championat',
435 # https://github.com/rg3/youtube-dl/issues/3541
436 'add_ie': ['BrightcoveLegacy'],
437 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
439 'id': '3866516442001',
441 'title': 'Leer mij vrouwen kennen: Aflevering 1',
442 'description': 'Leer mij vrouwen kennen: Aflevering 1',
443 'uploader': 'SBS Broadcasting',
445 'skip': 'Restricted to Netherlands',
447 'skip_download': True, # m3u8 download
451 # Brightcove video in <iframe>
452 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
453 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
455 'id': '5360463607001',
457 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
458 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
459 'uploader': 'United Nations',
460 'uploader_id': '1362235914001',
461 'timestamp': 1489593889,
462 'upload_date': '20170315',
464 'add_ie': ['BrightcoveLegacy'],
467 # Brightcove with alternative playerID key
468 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
470 'id': 'nmeth.2062_SV1',
471 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
475 'id': '2228375078001',
477 'title': 'nmeth.2062-sv1',
478 'description': 'nmeth.2062-sv1',
479 'timestamp': 1363357591,
480 'upload_date': '20130315',
481 'uploader': 'Nature Publishing Group',
482 'uploader_id': '1964492299001',
487 # Brightcove with UUID in videoPlayer
488 'url': 'http://www8.hp.com/cn/zh/home.html',
490 'id': '5255815316001',
492 'title': 'Sprocket Video - China',
493 'description': 'Sprocket Video - China',
494 'uploader': 'HP-Video Gallery',
495 'timestamp': 1482263210,
496 'upload_date': '20161220',
497 'uploader_id': '1107601872001',
500 'skip_download': True, # m3u8 download
502 'skip': 'video rotates...weekly?',
505 # Brightcove:new type [2].
506 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
507 'md5': '2b35148fcf48da41c9fb4591650784f3',
509 'id': '5348741021001',
511 'upload_date': '20170306',
512 'uploader_id': '4191638492001',
513 'timestamp': 1488769918,
514 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
519 # Alternative brightcove <video> attributes
520 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
522 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
523 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
526 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
528 'id': '5311302538001',
530 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
531 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
532 'timestamp': 1486321708,
533 'upload_date': '20170205',
534 'uploader_id': '800000640001',
536 'only_matching': True,
540 # Brightcove with UUID in videoPlayer
541 'url': 'http://www8.hp.com/cn/zh/home.html',
543 'id': '5255815316001',
545 'title': 'Sprocket Video - China',
546 'description': 'Sprocket Video - China',
547 'uploader': 'HP-Video Gallery',
548 'timestamp': 1482263210,
549 'upload_date': '20161220',
550 'uploader_id': '1107601872001',
553 'skip_download': True, # m3u8 download
558 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
559 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
561 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
563 'title': '2cc213299525360.mov', # that's what we get
566 'add_ie': ['Ooyala'],
569 # ooyala video embedded with http://player.ooyala.com/iframe.js
570 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
572 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
574 'title': '"Steve Jobs: Man in the Machine" trailer',
575 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
579 'skip_download': True,
581 'skip': 'movie expired',
583 # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
585 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
587 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
589 'title': 'Steampunk Fest Comes to Honesdale',
593 'skip_download': True,
598 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
602 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
603 'upload_date': '20140225',
604 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
605 'uploader': 'Tested',
606 'uploader_id': 'testedcom',
608 # No need to test YoutubeIE here
610 'skip_download': True,
615 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
619 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
620 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
622 # HEAD requests lead to endless 301, while GET is OK
623 'expected_warnings': ['301'],
627 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
631 'title': 'Охотское море стало целиком российским',
632 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
636 'skip_download': True,
641 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
645 'title': 'Дошкольное воспитание',
650 'url': 'http://www.vestifinance.ru/articles/25753',
653 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
658 'title': 'Госзаказ. День 3',
664 'title': 'Госзаказ. День 2',
670 'title': 'Госзаказ. День 1',
676 'skip_download': True,
681 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
683 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
685 'title': 'Ужастики, русский трейлер (2015)',
686 'thumbnail': r're:^https?://.*\.jpg$',
692 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
695 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
697 'playlist_mincount': 7,
698 # This forum does not allow <iframe> syntaxes anymore
699 # Now HTML tags are displayed as-is
700 'skip': 'No videos on this page',
704 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
705 'md5': '65fdff94098e4a607385a60c5177c638',
709 'title': 'Hidden miracles of the natural world',
710 'uploader': 'Louie Schwartzberg',
711 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
714 # nowvideo embed hidden behind percent encoding
716 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
717 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
719 'id': '06e53103ca9aa',
721 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
722 'description': 'No description',
727 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
728 'md5': '7653032cbb25bf6c80d80f217055fa43',
730 'id': '048195-004_PLUS7-F',
733 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
734 'upload_date': '20140320',
737 'skip_download': 'Requires rtmpdump'
739 'skip': 'video gone',
743 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
747 'title': 'Alcaline, le concert avec Calogero',
748 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
749 'upload_date': '20150226',
750 'timestamp': 1424989860,
755 'skip_download': True,
757 'expected_warnings': [
763 'url': 'http://www.wired.com/2014/04/honda-asimo/',
764 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
766 'id': '53501be369702d3275860000',
768 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
773 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
774 'md5': '441aeeb82eb72c422c7f14ec533999cd',
776 'id': 'k2mm4bCdJ6CQ2i7c8o2',
778 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
779 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
781 'uploader_id': 'xgditw',
782 'upload_date': '20140425',
783 'timestamp': 1398441542,
785 'add_ie': ['Dailymotion'],
789 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
793 'title': 'Care worker punches elderly dementia patient in head 11 times',
794 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
796 'add_ie': ['DailyMail'],
798 'skip_download': True,
803 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
807 'title': 'The NBL Auction 2014',
808 'uploader': 'BADMINTON England',
809 'uploader_id': 'BADMINTONEvents',
810 'upload_date': '20140603',
811 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
813 'add_ie': ['Youtube'],
815 'skip_download': True,
820 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
821 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
823 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
825 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
826 'description': 'Two valets share their love for movie star Liam Neesons.',
827 'timestamp': 1349922600,
828 'upload_date': '20121011',
831 # YouTube embed via <data-embed-url="">
833 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
837 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
838 'uploader': 'Gameloft',
839 'uploader_id': 'gameloft',
840 'upload_date': '20140828',
841 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
844 'skip_download': True,
847 # YouTube <object> embed
849 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
850 'md5': '516718101ec834f74318df76259fb3cc',
854 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
855 'upload_date': '20080526',
856 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
857 'uploader': 'Christopher Sykes',
858 'uploader_id': 'ChristopherJSykes',
860 'add_ie': ['Youtube'],
864 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
866 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
868 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
869 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
874 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
876 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
877 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
883 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
888 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
889 'md5': '9d65602bf31c6e20014319c7d07fba27',
891 'id': '5123ea6d5e5a7',
894 'uploader': 'www.handjobhub.com',
895 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
898 # Multiple brightcove videos
899 # https://github.com/rg3/youtube-dl/issues/2283
901 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
903 'id': 'always-never',
904 'title': 'Always / Never - The New Yorker',
908 'extract_flat': False,
909 'skip_download': True,
914 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
915 'md5': '96f09a37e44da40dd083e12d9a683327',
919 'title': 'Ump changes call to ball',
920 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
922 'timestamp': 1401537900,
923 'upload_date': '20140531',
924 'thumbnail': r're:^https?://.*\.jpg$',
929 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
930 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
934 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
935 'description': 'a Paywall Videos video from Remilon',
937 'uploader': 'study.com',
938 'timestamp': 1459678540,
939 'upload_date': '20160403',
940 'filesize': 24687186,
944 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
945 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
949 'title': 'Conversation about Hexagonal Rails Part 1',
950 'description': 'a Martin Fowler video from ThoughtWorks',
952 'uploader': 'thoughtworks.wistia.com',
953 'timestamp': 1401832161,
954 'upload_date': '20140603',
957 # Wistia standard embed (async)
959 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
963 'title': 'Drip Brennan Dunn Workshop',
964 'description': 'a JV Webinars video from getdrip-1',
966 'timestamp': 1463607249,
967 'upload_date': '20160518',
970 'skip_download': True,
975 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
979 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
980 'uploader': 'Sophos Security',
981 'title': 'Chet Chat 171 - Oct 29, 2014',
982 'upload_date': '20141029',
985 # Soundcloud multiple embeds
987 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
990 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
992 'playlist_mincount': 7,
994 # TuneIn station embed
996 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
1001 'location': 'Paris, France',
1006 'skip_download': True,
1011 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
1015 'upload_date': '20141112',
1016 'title': 'Rosetta #CometLanding webcast HL 10',
1019 # Another Livestream embed, without 'new.' in URL
1021 'url': 'https://www.freespeech.org/',
1025 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
1029 'skip_download': True,
1034 'url': 'https://skiplagged.com/',
1037 'title': 'Skiplagged: The smart way to find cheap flights',
1039 'playlist_mincount': 1,
1040 'add_ie': ['Youtube'],
1044 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1048 'upload_date': '20141126',
1049 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1054 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1056 'id': '730m_DandD_1901_512k',
1058 'uploader': 'www.abc.net.au',
1059 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1062 # embedded viddler video
1064 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1068 'uploader': 'deadspin',
1069 'title': 'WALL-TO-GORTAT',
1070 'timestamp': 1422285291,
1071 'upload_date': '20150126',
1073 'add_ie': ['Viddler'],
1077 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1081 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1082 'description': 'md5:601cb790edd05908957dae8aaa866465',
1083 'upload_date': '20150220',
1085 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
1089 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1091 'id': 'Mrj4DVp2zeA',
1093 'upload_date': '20150212',
1094 'uploader': 'The National Archives UK',
1095 'description': 'md5:8078af856dca76edc42910b61273dbbf',
1096 'uploader_id': 'NationalArchives08',
1097 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1102 'url': 'http://www.suffolk.edu/sjc/',
1106 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1107 'uploader': 'www.suffolk.edu',
1110 'skip_download': True,
1112 'skip': 'does not contain a video anymore',
1116 'url': 'http://www.indiedb.com/games/king-machine/videos',
1120 'title': 'king machine trailer 1',
1121 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
1122 'thumbnail': r're:^https?://.*\.jpg$',
1126 # JWPlayer config passed as variable
1127 'url': 'http://www.txxx.com/videos/3326530/ariele/',
1131 'title': 'ARIELE | Tube Cup',
1132 'uploader': 'www.txxx.com',
1136 'skip_download': True,
1141 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
1142 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
1146 'upload_date': '20171122',
1147 'timestamp': 1511366290,
1148 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
1152 # Video.js embed, multiple formats
1153 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
1155 'id': 'yygqldloqIk',
1157 'title': 'SolidWorks. Урок 6 Настройка чертежа',
1158 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
1159 'upload_date': '20130314',
1160 'uploader': 'PROстое3D',
1161 'uploader_id': 'PROstoe3D',
1164 'skip_download': True,
1168 # Video.js embed, single format
1169 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
1173 'title': 'Step 1 - Good Foundation',
1174 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
1177 'skip_download': True,
1182 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1183 'playlist_mincount': 5,
1185 'id': 'aanslagen-kopenhagen',
1186 'title': 'Aanslagen Kopenhagen',
1191 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1195 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1198 # Kaltura embed (different embed code)
1200 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1204 'upload_date': '20150127',
1205 'uploader_id': 'PremierMedia',
1207 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1210 # Kaltura embed with single quotes
1212 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1217 'timestamp': 1355743100,
1218 'upload_date': '20121217',
1219 'uploader_id': 'batchUser',
1221 'add_ie': ['Kaltura'],
1224 # Kaltura embedded via quoted entry_id
1225 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1229 'title': '06_matthew_brender_raj_dutt',
1230 'timestamp': 1466638791,
1231 'upload_date': '20160622',
1233 'add_ie': ['Kaltura'],
1234 'expected_warnings': [
1235 'Could not send HEAD request'
1238 'skip_download': True,
1242 # Kaltura embedded, some fileExt broken (#11480)
1243 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1247 'title': 'Our "Standard Models" of particle physics and cosmology',
1248 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1249 'timestamp': 1321158993,
1250 'upload_date': '20111113',
1251 'uploader_id': 'kps1',
1253 'add_ie': ['Kaltura'],
1256 # Kaltura iframe embed
1257 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1258 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1262 'title': 'I. M. Pei: A Centennial Celebration',
1263 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1264 'upload_date': '20170403',
1265 'uploader_id': 'batchUser',
1266 'timestamp': 1491232186,
1268 'add_ie': ['Kaltura'],
1270 # EaglePlatform embed (generic URL)
1272 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1273 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1277 'title': 'Навальный вышел на свободу',
1278 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1279 'thumbnail': r're:^https?://.*\.jpg$',
1285 'skip_download': True,
1288 # referrer protected EaglePlatform embed
1290 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
1294 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1295 'thumbnail': r're:^https?://.*\.jpg$',
1300 'skip_download': True,
1303 # ClipYou (EaglePlatform) embed (custom URL)
1305 'url': 'http://muz-tv.ru/play/7129/',
1306 # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1310 'title': "'O Sole Mio",
1311 'thumbnail': r're:^https?://.*\.jpg$',
1316 'skip_download': True,
1318 'skip': 'This video is unavailable.',
1322 'url': 'http://muz-tv.ru/kinozal/view/7400/',
1326 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1327 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1328 'thumbnail': r're:^https?://.*\.jpg$',
1332 'skip': 'HTTP Error 404: Not Found',
1336 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1340 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1341 'thumbnail': r're:^https?://.*\.png$',
1347 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1348 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1352 'title': 'Facebook Creates "On This Day" | Crunch Report',
1353 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
1354 'timestamp': 1427237531,
1355 'uploader': 'Crunch Report',
1356 'upload_date': '20150324',
1360 'skip_download': True,
1365 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1369 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1374 # Crooks and Liars embed
1376 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1380 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1381 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1382 'timestamp': 1428207000,
1383 'upload_date': '20150405',
1384 'uploader': 'Heather',
1387 # Crooks and Liars external embed
1389 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1391 'id': 'MTE3MjUtMzQ2MzA',
1393 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1394 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1395 'timestamp': 1265032391,
1396 'upload_date': '20100201',
1397 'uploader': 'Heather',
1400 # NBC Sports vplayer embed
1402 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1404 'id': 'ln7x1qSThw4k',
1406 'title': "PFT Live: New leader in the 'new-look' defense",
1407 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1408 'uploader': 'NBCU-SPORTS',
1409 'upload_date': '20140107',
1410 'timestamp': 1389118457,
1412 'skip': 'Invalid Page URL',
1416 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1417 'md5': '1aa589c675898ae6d37a17913cf68d66',
1419 'id': 'x_dtl_oa_LettermanliftPR_160608',
1421 'title': 'David Letterman: A Preview',
1422 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1423 'upload_date': '20160609',
1424 'timestamp': 1465431544,
1425 'uploader': 'NBCU-NEWS',
1430 'url': 'https://video.udn.com/news/300346',
1431 'md5': 'fd2060e988c326991037b9aff9df21a6',
1435 'title': '中一中男師變性 全校師生力挺',
1436 'thumbnail': r're:^https?://.*\.jpg$',
1440 'skip_download': True,
1442 'expected_warnings': ['Failed to parse JSON Expecting value'],
1446 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1448 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1450 'description': 'Index/Match versus VLOOKUP.',
1451 'title': 'This is what separates the Excel masters from the wannabes',
1452 'duration': 191.933,
1456 'skip_download': True,
1459 # Brightcove URL in single quotes
1461 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1462 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1464 'id': '4255764656001',
1466 'title': 'SN Presents: Russell Martin, World Citizen',
1467 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1468 'uploader': 'Rogers Sportsnet',
1469 'uploader_id': '1704050871',
1470 'upload_date': '20150525',
1471 'timestamp': 1432570283,
1474 # Dailymotion Cloud video
1476 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1477 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1481 'title': 'Sauvons les abeilles ! - Le débat',
1482 'description': 'md5:d9082128b1c5277987825d684939ca26',
1483 'thumbnail': r're:^https?://.*\.jpe?g$',
1484 'timestamp': 1434970506,
1485 'upload_date': '20150622',
1486 'uploader': 'Public Sénat',
1487 'uploader_id': 'xa9gza',
1489 'skip': 'File not found.',
1491 # OnionStudios embed
1493 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1497 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1498 'thumbnail': r're:^https?://.*\.jpe?g$',
1499 'uploader': 'ClickHole',
1500 'uploader_id': 'clickhole',
1505 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1507 'id': '74849a00-85a9-11e1-9660-123139220831',
1509 'title': '#whilewewatch',
1512 # AdobeTVVideo embed
1514 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1515 'md5': '43662b577c018ad707a63766462b1e87',
1519 'title': 'New experience with Acrobat DC',
1520 'description': 'New experience with Acrobat DC',
1521 'duration': 248.667,
1524 # BrightcoveInPageEmbed embed
1526 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1528 'id': '4238694884001',
1530 'title': 'Tabletop: Dread, Last Thoughts',
1531 'description': 'Tabletop: Dread, Last Thoughts',
1535 # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1536 # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1538 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1540 'id': '4785848093001',
1542 'title': 'The Cardinal Pell Interview',
1543 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1544 'uploader': 'GlobeCast Australia - GlobeStream',
1545 'uploader_id': '2733773828001',
1546 'upload_date': '20160304',
1547 'timestamp': 1457083087,
1551 'skip_download': True,
1555 # Brightcove embed with whitespace around attribute names
1556 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1558 'id': '3167554373001',
1560 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1561 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1562 'uploader_id': '1079349493',
1563 'upload_date': '20140207',
1564 'timestamp': 1391810548,
1567 'skip_download': True,
1570 # Another form of arte.tv embed
1572 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1573 'md5': '850bfe45417ddf221288c88a0cffe2e2',
1575 'id': '030273-562_PLUS7-F',
1577 'title': 'ARTE Reportage - Nulle part, en France',
1578 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1579 'upload_date': '20160409',
1584 'url': 'http://www.wykop.pl/link/3088787/',
1585 'md5': '7619da8c820e835bef21a1efa2a0fc71',
1587 'id': '874_1459135191',
1589 'title': 'Man shows poor quality of new apartment building',
1590 'description': 'The wall is like a sand pile.',
1591 'uploader': 'Lake8737',
1593 'add_ie': [LiveLeakIE.ie_key()],
1595 # Another LiveLeak embed pattern (#13336)
1597 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
1599 'id': '2eb_1496309988',
1601 'title': 'Thief robs place where everyone was armed',
1602 'description': 'md5:694d73ee79e535953cf2488562288eee',
1603 'uploader': 'brazilwtf',
1605 'add_ie': [LiveLeakIE.ie_key()],
1607 # Duplicated embedded video URLs
1609 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1611 'id': '149298443_480_16c25b74_2',
1613 'title': 'vs. Blue Orange Spring Game',
1614 'uploader': 'www.hudl.com',
1617 # twitter:player:stream embed
1619 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1623 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1624 'uploader': 'www.rtl.be',
1628 'skip_download': True,
1631 # twitter:player embed
1633 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1634 'md5': 'a3e0df96369831de324f0778e126653c',
1636 'id': '4909620399001',
1638 'title': 'What Do Black Holes Sound Like?',
1639 'description': 'what do black holes sound like',
1640 'upload_date': '20160524',
1641 'uploader_id': '29913724001',
1642 'timestamp': 1464107587,
1643 'uploader': 'TheAtlantic',
1645 'add_ie': ['BrightcoveLegacy'],
1647 # Facebook <iframe> embed
1649 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1650 'md5': 'fbcde74f534176ecb015849146dd3aee',
1652 'id': '599637780109885',
1654 'title': 'Facebook video #599637780109885',
1657 # Facebook <iframe> embed, plugin video
1659 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
1661 'id': '1754168231264132',
1663 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
1664 'uploader': 'Tariq Ramadan (official)',
1665 'timestamp': 1496758379,
1666 'upload_date': '20170606',
1669 'skip_download': True,
1672 # Facebook API embed
1674 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1675 'md5': 'a47372ee61b39a7b90287094d447d94e',
1677 'id': '10153467542406923',
1679 'title': 'Facebook video #10153467542406923',
1682 # Wordpress "YouTube Video Importer" plugin
1684 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1685 'md5': 'd16797741b560b485194eddda8121b48',
1687 'id': 'HNTXWDXV9Is',
1689 'title': 'Blue Devils Drumline Stanford lot 2016',
1690 'upload_date': '20160627',
1691 'uploader_id': 'GENOCIDE8GENERAL10',
1692 'uploader': 'cylus cyrus',
1696 # video stored on custom kaltura server
1697 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1698 'md5': '537617d06e64dfed891fa1593c4b30cc',
1702 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1703 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1704 'uploader_id': 'videos.expansion@el-mundo.net',
1705 'upload_date': '20150429',
1706 'timestamp': 1430303472,
1708 'add_ie': ['Kaltura'],
1711 # Non-standard Vimeo embed
1712 'url': 'https://openclassrooms.com/courses/understanding-the-web',
1713 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1717 'title': 'Understanding the web - Teaser',
1718 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1719 'upload_date': '20151214',
1720 'uploader': 'OpenClassrooms',
1721 'uploader_id': 'openclassrooms',
1723 'add_ie': ['Vimeo'],
1726 # generic vimeo embed that requires original URL passed as Referer
1727 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1728 'only_matching': True,
1731 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1732 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1734 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1736 'title': 'Big Buck Bunny',
1737 'description': 'Royalty free test video',
1738 'timestamp': 1432816365,
1739 'upload_date': '20150528',
1743 'skip_download': True,
1745 'add_ie': [ArkenaIE.ie_key()],
1748 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1752 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1755 'skip_download': True,
1757 'add_ie': [Vbox7IE.ie_key()],
1761 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1764 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1766 'playlist_mincount': 3,
1770 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1772 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1773 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1775 'playlist_mincount': 2,
1779 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1783 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1784 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1787 'skip_download': True,
1789 'add_ie': [TwentyMinutenIE.ie_key()],
1793 'url': 'https://en.support.wordpress.com/videopress/',
1797 'title': 'IMG_5786',
1798 'timestamp': 1435711927,
1799 'upload_date': '20150701',
1802 'skip_download': True,
1804 'add_ie': [VideoPressIE.ie_key()],
1808 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1810 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1812 'title': 'Магаззино: Казань 2',
1813 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1814 'uploader': 'Магаззино',
1815 'upload_date': '20170228',
1816 'uploader_id': '996642',
1819 'skip_download': True,
1821 'add_ie': [RutubeIE.ie_key()],
1824 # ThePlatform embedded with whitespaces in URLs
1825 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1826 'only_matching': True,
1829 # Senate ISVP iframe https
1830 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1831 'md5': 'fb8c70b0b515e5037981a2492099aab8',
1833 'id': 'govtaff020316',
1835 'title': 'Integrated Senate Video Player',
1837 'add_ie': [SenateISVPIE.ie_key()],
1840 # Limelight embeds (1 channel embed + 4 media embeds)
1841 'url': 'http://www.sedona.com/FacilitatorTraining2017',
1843 'id': 'FacilitatorTraining2017',
1844 'title': 'Facilitator Training 2017',
1846 'playlist_mincount': 5,
1849 # Limelight embed (LimelightPlayerUtil.embed)
1850 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
1852 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
1854 'title': '07448641',
1855 'timestamp': 1499890639,
1856 'upload_date': '20170712',
1859 'skip_download': True,
1861 'add_ie': ['LimelightMedia'],
1864 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1866 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1867 'title': 'Standoff with Walnut Creek murder suspect ends',
1868 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1870 'playlist_mincount': 4,
1873 # WashingtonPost embed
1874 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1876 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1878 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1879 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1880 'timestamp': 1455216756,
1881 'uploader': 'The Washington Post',
1882 'upload_date': '20160211',
1884 'add_ie': [WashingtonPostIE.ie_key()],
1888 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
1892 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
1895 'skip_download': True,
1897 'add_ie': [MediasetIE.ie_key()],
1901 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1903 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1904 'title': 'Slovenskom sa prehnala vlna silných búrok',
1906 'playlist_mincount': 5,
1907 'add_ie': [JojIE.ie_key()],
1910 # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
1911 'url': 'https://tvrain.ru/amp/418921/',
1912 'md5': 'cc00413936695987e8de148b67d14f1d',
1916 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1921 'url': 'http://help.vzaar.com/article/165-embedding-video',
1922 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
1926 'title': 'Building A Business Online: Principal Chairs Q & A',
1930 # multiple HTML5 videos on one page
1931 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
1933 'id': 'keyscenarios',
1934 'title': 'Rescue Kit 14 Free Edition - Getting started',
1936 'playlist_count': 4,
1940 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
1941 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
1944 'title': 'vl14062007715967',
1949 # # TODO: find another test
1950 # # http://schema.org/VideoObject
1951 # 'url': 'https://flipagram.com/f/nyvTSJMKId',
1952 # 'md5': '888dcf08b7ea671381f00fab74692755',
1954 # 'id': 'nyvTSJMKId',
1956 # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1957 # 'description': '#love for cats.',
1958 # 'timestamp': 1461244995,
1959 # 'upload_date': '20160421',
1962 # 'force_generic_extractor': True,
1967 def report_following_redirect(self, new_url):
1968 """Report information extraction."""
1969 self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1971 def _extract_rss(self, url, video_id, doc):
1972 playlist_title = doc.find('./channel/title').text
1973 playlist_desc_el = doc.find('./channel/description')
1974 playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1977 for it in doc.findall('./channel/item'):
1978 next_url = xpath_text(it, 'link', fatal=False)
1980 enclosure_nodes = it.findall('./enclosure')
1981 for e in enclosure_nodes:
1982 next_url = e.attrib.get('url')
1990 '_type': 'url_transparent',
1992 'title': it.find('title').text,
1996 '_type': 'playlist',
1998 'title': playlist_title,
1999 'description': playlist_desc,
2003 def _extract_camtasia(self, url, video_id, webpage):
2004 """ Returns None if no camtasia video can be found. """
2006 camtasia_cfg = self._search_regex(
2007 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
2008 webpage, 'camtasia configuration file', default=None)
2009 if camtasia_cfg is None:
2012 title = self._html_search_meta('DC.title', webpage, fatal=True)
2014 camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
2015 camtasia_cfg = self._download_xml(
2016 camtasia_url, video_id,
2017 note='Downloading camtasia configuration',
2018 errnote='Failed to download camtasia configuration')
2019 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
2022 for n in fileset_node.getchildren():
2023 url_n = n.find('./uri')
2028 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
2029 'title': '%s - %s' % (title, n.tag),
2030 'url': compat_urlparse.urljoin(url, url_n.text),
2031 'duration': float_or_none(n.find('./duration').text),
2035 '_type': 'playlist',
2040 def _real_extract(self, url):
2041 if url.startswith('//'):
2044 'url': self.http_scheme() + url,
2047 parsed_url = compat_urlparse.urlparse(url)
2048 if not parsed_url.scheme:
2049 default_search = self._downloader.params.get('default_search')
2050 if default_search is None:
2051 default_search = 'fixup_error'
2053 if default_search in ('auto', 'auto_warning', 'fixup_error'):
2055 self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
2056 return self.url_result('http://' + url)
2057 elif default_search != 'fixup_error':
2058 if default_search == 'auto_warning':
2059 if re.match(r'^(?:url|URL)$', url):
2060 raise ExtractorError(
2061 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
2064 self._downloader.report_warning(
2065 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
2066 return self.url_result('ytsearch:' + url)
2068 if default_search in ('error', 'fixup_error'):
2069 raise ExtractorError(
2070 '%r is not a valid URL. '
2071 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
2072 % (url, url), expected=True)
2074 if ':' not in default_search:
2075 default_search += ':'
2076 return self.url_result(default_search + url)
2078 url, smuggled_data = unsmuggle_url(url)
2079 force_videoid = None
2080 is_intentional = smuggled_data and smuggled_data.get('to_generic')
2081 if smuggled_data and 'force_videoid' in smuggled_data:
2082 force_videoid = smuggled_data['force_videoid']
2083 video_id = force_videoid
2085 video_id = self._generic_id(url)
2087 self.to_screen('%s: Requesting header' % video_id)
2089 head_req = HEADRequest(url)
2090 head_response = self._request_webpage(
2092 note=False, errnote='Could not send HEAD request to %s' % url,
2095 if head_response is not False:
2096 # Check for redirect
2097 new_url = compat_str(head_response.geturl())
2099 self.report_following_redirect(new_url)
2101 new_url = smuggle_url(
2102 new_url, {'force_videoid': force_videoid})
2103 return self.url_result(new_url)
2105 full_response = None
2106 if head_response is False:
2107 request = sanitized_Request(url)
2108 request.add_header('Accept-Encoding', '*')
2109 full_response = self._request_webpage(request, video_id)
2110 head_response = full_response
2114 'title': self._generic_title(url),
2115 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
2118 # Check for direct link to a video
2119 content_type = head_response.headers.get('Content-Type', '').lower()
2120 m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
2122 format_id = compat_str(m.group('format_id'))
2123 if format_id.endswith('mpegurl'):
2124 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
2125 elif format_id == 'f4m':
2126 formats = self._extract_f4m_formats(url, video_id)
2129 'format_id': format_id,
2131 'vcodec': 'none' if m.group('type') == 'audio' else None
2133 info_dict['direct'] = True
2134 self._sort_formats(formats)
2135 info_dict['formats'] = formats
2138 if not self._downloader.params.get('test', False) and not is_intentional:
2139 force = self._downloader.params.get('force_generic_extractor', False)
2140 self._downloader.report_warning(
2141 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
2143 if not full_response:
2144 request = sanitized_Request(url)
2145 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
2146 # making it impossible to download only chunk of the file (yet we need only 512kB to
2147 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
2148 # that will always result in downloading the whole file that is not desirable.
2149 # Therefore for extraction pass we have to override Accept-Encoding to any in order
2150 # to accept raw bytes and being able to download only a chunk.
2151 # It may probably better to solve this by checking Content-Type for application/octet-stream
2152 # after HEAD request finishes, but not sure if we can rely on this.
2153 request.add_header('Accept-Encoding', '*')
2154 full_response = self._request_webpage(request, video_id)
2156 first_bytes = full_response.read(512)
2158 # Is it an M3U playlist?
2159 if first_bytes.startswith(b'#EXTM3U'):
2160 info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
2161 self._sort_formats(info_dict['formats'])
2164 # Maybe it's a direct link to a video?
2165 # Be careful not to download the whole thing!
2166 if not is_html(first_bytes):
2167 self._downloader.report_warning(
2168 'URL could be a direct video link, returning it as such.')
2175 webpage = self._webpage_read_content(
2176 full_response, url, video_id, prefix=first_bytes)
2178 self.report_extraction(video_id)
2180 # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
2182 doc = compat_etree_fromstring(webpage.encode('utf-8'))
2183 if doc.tag == 'rss':
2184 return self._extract_rss(url, video_id, doc)
2185 elif doc.tag == 'SmoothStreamingMedia':
2186 info_dict['formats'] = self._parse_ism_formats(doc, url)
2187 self._sort_formats(info_dict['formats'])
2189 elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
2190 smil = self._parse_smil(doc, url, video_id)
2191 self._sort_formats(smil['formats'])
2193 elif doc.tag == '{http://xspf.org/ns/0/}playlist':
2194 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
2195 elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
2196 info_dict['formats'] = self._parse_mpd_formats(
2198 mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
2200 self._sort_formats(info_dict['formats'])
2202 elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
2203 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
2204 self._sort_formats(info_dict['formats'])
2206 except compat_xml_parse_error:
2209 # Is it a Camtasia project?
2210 camtasia_res = self._extract_camtasia(url, video_id, webpage)
2211 if camtasia_res is not None:
2214 # Sometimes embedded video player is hidden behind percent encoding
2215 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
2216 # Unescaping the whole page allows to handle those cases in a generic way
2217 webpage = compat_urllib_parse_unquote(webpage)
2219 # it's tempting to parse this further, but you would
2220 # have to take into account all the variations like
2221 # Video Title - Site Name
2222 # Site Name | Video Title
2223 # Video Title - Tagline | Site Name
2224 # and so on and so forth; it's just not practical
2225 video_title = self._og_search_title(
2226 webpage, default=None) or self._html_search_regex(
2227 r'(?s)<title>(.*?)</title>', webpage, 'video title',
2230 # Try to detect age limit automatically
2231 age_limit = self._rta_search(webpage)
2232 # And then there are the jokers who advertise that they use RTA,
2233 # but actually don't.
2234 AGE_LIMIT_MARKERS = [
2235 r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
2237 if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2240 # video uploader is domain name
2241 video_uploader = self._search_regex(
2242 r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
2244 video_description = self._og_search_description(webpage, default=None)
2245 video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2248 'title': video_title,
2249 'description': video_description,
2250 'thumbnail': video_thumbnail,
2251 'age_limit': age_limit,
2254 # Look for Brightcove Legacy Studio embeds
2255 bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
2259 'url': smuggle_url(bc_url, {'Referer': url}),
2260 'ie_key': 'BrightcoveLegacy'
2261 } for bc_url in bc_urls]
2264 '_type': 'playlist',
2265 'title': video_title,
2270 # Look for Brightcove New Studio embeds
2271 bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
2273 return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
2275 # Look for Nexx embeds
2276 nexx_urls = NexxIE._extract_urls(webpage)
2278 return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
2280 # Look for Nexx iFrame embeds
2281 nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
2283 return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
2285 # Look for ThePlatform embeds
2286 tp_urls = ThePlatformIE._extract_urls(webpage)
2288 return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
2290 # Look for Vessel embeds
2291 vessel_urls = VesselIE._extract_urls(webpage)
2293 return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
2295 # Look for embedded rtl.nl player
2296 matches = re.findall(
2297 r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
2300 return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
2302 vimeo_urls = VimeoIE._extract_urls(url, webpage)
2304 return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
2306 vid_me_embed_url = self._search_regex(
2307 r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
2308 webpage, 'vid.me embed', default=None)
2309 if vid_me_embed_url is not None:
2310 return self.url_result(vid_me_embed_url, 'Vidme')
2312 # Look for YouTube embeds
2313 youtube_urls = YoutubeIE._extract_urls(webpage)
2315 return self.playlist_from_matches(
2316 youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
2318 matches = DailymotionIE._extract_urls(webpage)
2320 return self.playlist_from_matches(matches, video_id, video_title)
2322 # Look for embedded Dailymotion playlist player (#3822)
2324 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2326 playlists = re.findall(
2327 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2329 return self.playlist_from_matches(
2330 playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
2332 # Look for DailyMail embeds
2333 dailymail_urls = DailyMailIE._extract_urls(webpage)
2335 return self.playlist_from_matches(
2336 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
2338 # Look for embedded Wistia player
2339 wistia_url = WistiaIE._extract_url(webpage)
2342 '_type': 'url_transparent',
2343 'url': self._proto_relative_url(wistia_url),
2344 'ie_key': WistiaIE.ie_key(),
2345 'uploader': video_uploader,
2348 # Look for SVT player
2349 svt_url = SVTIE._extract_url(webpage)
2351 return self.url_result(svt_url, 'SVT')
2353 # Look for Bandcamp pages with custom domain
2354 mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2355 if mobj is not None:
2356 burl = unescapeHTML(mobj.group(1))
2357 # Don't set the extractor because it can be a track url or an album
2358 return self.url_result(burl)
2360 # Look for embedded Vevo player
2362 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2363 if mobj is not None:
2364 return self.url_result(mobj.group('url'))
2366 # Look for embedded Viddler player
2368 r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2370 if mobj is not None:
2371 return self.url_result(mobj.group('url'))
2373 # Look for NYTimes player
2375 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2377 if mobj is not None:
2378 return self.url_result(mobj.group('url'))
2380 # Look for Libsyn player
2382 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2383 if mobj is not None:
2384 return self.url_result(mobj.group('url'))
2386 # Look for Ooyala videos
2387 mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
2388 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2389 re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2390 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2391 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2392 if mobj is not None:
2393 embed_token = self._search_regex(
2394 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2395 webpage, 'ooyala embed token', default=None)
2396 return OoyalaIE._build_url_result(smuggle_url(
2399 'embed_token': embed_token,
2402 # Look for multiple Ooyala embeds on SBN network websites
2403 mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2404 if mobj is not None:
2405 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2407 return self.playlist_from_matches(
2408 embeds, video_id, video_title,
2409 getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2411 # Look for Aparat videos
2412 mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2413 if mobj is not None:
2414 return self.url_result(mobj.group(1), 'Aparat')
2416 # Look for MPORA videos
2417 mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2418 if mobj is not None:
2419 return self.url_result(mobj.group(1), 'Mpora')
2421 # Look for embedded NovaMov-based player
2423 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2424 (?P<url>http://(?:(?:embed|www)\.)?
2426 nowvideo\.(?:ch|sx|eu|at|ag|co)|
2427 videoweed\.(?:es|com)|
2428 movshare\.(?:net|sx|ag)|
2429 divxstage\.(?:eu|net|ch|co|at|ag))
2430 /embed\.php.+?)\1''', webpage)
2431 if mobj is not None:
2432 return self.url_result(mobj.group('url'))
2434 # Look for embedded Facebook player
2435 facebook_urls = FacebookIE._extract_urls(webpage)
2437 return self.playlist_from_matches(facebook_urls, video_id, video_title)
2439 # Look for embedded VK player
2440 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2441 if mobj is not None:
2442 return self.url_result(mobj.group('url'), 'VK')
2444 # Look for embedded Odnoklassniki player
2445 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2446 if mobj is not None:
2447 return self.url_result(mobj.group('url'), 'Odnoklassniki')
2449 # Look for embedded ivi player
2450 mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2451 if mobj is not None:
2452 return self.url_result(mobj.group('url'), 'Ivi')
2454 # Look for embedded Huffington Post player
2456 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2457 if mobj is not None:
2458 return self.url_result(mobj.group('url'), 'HuffPost')
2461 mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2462 if mobj is not None:
2463 return self.url_result(mobj.group('url'))
2464 mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2465 if mobj is not None:
2466 return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2468 # Look for funnyordie embed
2469 matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2471 return self.playlist_from_matches(
2472 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
2474 # Look for BBC iPlayer embed
2475 matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2477 return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
2479 # Look for embedded RUTV player
2480 rutv_url = RUTVIE._extract_url(webpage)
2482 return self.url_result(rutv_url, 'RUTV')
2484 # Look for embedded TVC player
2485 tvc_url = TVCIE._extract_url(webpage)
2487 return self.url_result(tvc_url, 'TVC')
2489 # Look for embedded SportBox player
2490 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2492 return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
2494 # Look for embedded XHamster player
2495 xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2497 return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2499 # Look for embedded TNAFlixNetwork player
2500 tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2502 return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2504 # Look for embedded PornHub player
2505 pornhub_urls = PornHubIE._extract_urls(webpage)
2507 return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
2509 # Look for embedded DrTuber player
2510 drtuber_urls = DrTuberIE._extract_urls(webpage)
2512 return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
2514 # Look for embedded RedTube player
2515 redtube_urls = RedTubeIE._extract_urls(webpage)
2517 return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
2519 # Look for embedded Tvigle player
2521 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2522 if mobj is not None:
2523 return self.url_result(mobj.group('url'), 'Tvigle')
2525 # Look for embedded TED player
2527 r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2528 if mobj is not None:
2529 return self.url_result(mobj.group('url'), 'TED')
2531 # Look for embedded Ustream videos
2532 ustream_url = UstreamIE._extract_url(webpage)
2534 return self.url_result(ustream_url, UstreamIE.ie_key())
2536 # Look for embedded arte.tv player
2538 r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2540 if mobj is not None:
2541 return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2543 # Look for embedded francetv player
2545 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2547 if mobj is not None:
2548 return self.url_result(mobj.group('url'))
2550 # Look for embedded smotri.com player
2551 smotri_url = SmotriIE._extract_url(webpage)
2553 return self.url_result(smotri_url, 'Smotri')
2555 # Look for embedded Myvi.ru player
2556 myvi_url = MyviIE._extract_url(webpage)
2558 return self.url_result(myvi_url)
2560 # Look for embedded soundcloud player
2561 soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2563 return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2565 # Look for tunein player
2566 tunein_urls = TuneInBaseIE._extract_urls(webpage)
2568 return self.playlist_from_matches(tunein_urls, video_id, video_title)
2570 # Look for embedded mtvservices player
2571 mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2573 return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2575 # Look for embedded yahoo player
2577 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2579 if mobj is not None:
2580 return self.url_result(mobj.group('url'), 'Yahoo')
2582 # Look for embedded sbs.com.au player
2586 <meta\s+property="og:video"\s+content=|
2589 (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2591 if mobj is not None:
2592 return self.url_result(mobj.group('url'), 'SBS')
2594 # Look for embedded Cinchcast player
2596 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2598 if mobj is not None:
2599 return self.url_result(mobj.group('url'), 'Cinchcast')
2602 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2606 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2608 if mobj is not None:
2609 return self.url_result(mobj.group('url'), 'MLB')
2612 r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2614 if mobj is not None:
2615 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2618 r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2620 if mobj is not None:
2621 return self.url_result(mobj.group('url'), 'Livestream')
2623 # Look for Zapiks embed
2625 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2626 if mobj is not None:
2627 return self.url_result(mobj.group('url'), 'Zapiks')
2629 # Look for Kaltura embeds
2630 kaltura_url = KalturaIE._extract_url(webpage)
2632 return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2634 # Look for EaglePlatform embeds
2635 eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2636 if eagleplatform_url:
2637 return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
2639 # Look for ClipYou (uses EaglePlatform) embeds
2641 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2642 if mobj is not None:
2643 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2645 # Look for Pladform embeds
2646 pladform_url = PladformIE._extract_url(webpage)
2648 return self.url_result(pladform_url)
2650 # Look for Videomore embeds
2651 videomore_url = VideomoreIE._extract_url(webpage)
2653 return self.url_result(videomore_url)
2655 # Look for Webcaster embeds
2656 webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2658 return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2660 # Look for Playwire embeds
2662 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2663 if mobj is not None:
2664 return self.url_result(mobj.group('url'))
2666 # Look for 5min embeds
2668 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2669 if mobj is not None:
2670 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2672 # Look for Crooks and Liars embeds
2674 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2675 if mobj is not None:
2676 return self.url_result(mobj.group('url'))
2678 # Look for NBC Sports VPlayer embeds
2679 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2681 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2683 # Look for NBC News embeds
2684 nbc_news_embed_url = re.search(
2685 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2686 if nbc_news_embed_url:
2687 return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2689 # Look for Google Drive embeds
2690 google_drive_url = GoogleDriveIE._extract_url(webpage)
2691 if google_drive_url:
2692 return self.url_result(google_drive_url, 'GoogleDrive')
2694 # Look for UDN embeds
2696 r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2697 if mobj is not None:
2698 return self.url_result(
2699 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2701 # Look for Senate ISVP iframe
2702 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2704 return self.url_result(senate_isvp_url, 'SenateISVP')
2706 # Look for Dailymotion Cloud videos
2707 dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2709 return self.url_result(dmcloud_url, 'DailymotionCloud')
2711 # Look for OnionStudios embeds
2712 onionstudios_url = OnionStudiosIE._extract_url(webpage)
2713 if onionstudios_url:
2714 return self.url_result(onionstudios_url)
2716 # Look for ViewLift embeds
2717 viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2719 return self.url_result(viewlift_url)
2721 # Look for JWPlatform embeds
2722 jwplatform_url = JWPlatformIE._extract_url(webpage)
2724 return self.url_result(jwplatform_url, 'JWPlatform')
2726 # Look for Digiteka embeds
2727 digiteka_url = DigitekaIE._extract_url(webpage)
2729 return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2731 # Look for Arkena embeds
2732 arkena_url = ArkenaIE._extract_url(webpage)
2734 return self.url_result(arkena_url, ArkenaIE.ie_key())
2736 # Look for Piksel embeds
2737 piksel_url = PikselIE._extract_url(webpage)
2739 return self.url_result(piksel_url, PikselIE.ie_key())
2741 # Look for Limelight embeds
2742 limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
2744 return self.playlist_result(
2745 limelight_urls, video_id, video_title, video_description)
2747 # Look for Anvato embeds
2748 anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
2750 return self.playlist_result(
2751 anvato_urls, video_id, video_title, video_description)
2753 # Look for AdobeTVVideo embeds
2755 r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2757 if mobj is not None:
2758 return self.url_result(
2759 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2762 # Look for Vine embeds
2764 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2766 if mobj is not None:
2767 return self.url_result(
2768 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2770 # Look for VODPlatform embeds
2772 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2774 if mobj is not None:
2775 return self.url_result(
2776 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2778 # Look for Mangomolo embeds
2780 r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2782 video\?.*?\bid=(?P<video_id>\d+)|
2783 index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2784 ).+?)\1''', webpage)
2785 if mobj is not None:
2787 '_type': 'url_transparent',
2788 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2789 'title': video_title,
2790 'description': video_description,
2791 'thumbnail': video_thumbnail,
2792 'uploader': video_uploader,
2794 video_id = mobj.group('video_id')
2797 'ie_key': 'MangomoloVideo',
2802 'ie_key': 'MangomoloLive',
2803 'id': mobj.group('channel_id'),
2807 # Look for Instagram embeds
2808 instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2809 if instagram_embed_url is not None:
2810 return self.url_result(
2811 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2813 # Look for LiveLeak embeds
2814 liveleak_urls = LiveLeakIE._extract_urls(webpage)
2816 return self.playlist_from_matches(liveleak_urls, video_id, video_title)
2818 # Look for 3Q SDN embeds
2819 threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2822 '_type': 'url_transparent',
2823 'ie_key': ThreeQSDNIE.ie_key(),
2824 'url': self._proto_relative_url(threeqsdn_url),
2825 'title': video_title,
2826 'description': video_description,
2827 'thumbnail': video_thumbnail,
2828 'uploader': video_uploader,
2831 # Look for VBOX7 embeds
2832 vbox7_url = Vbox7IE._extract_url(webpage)
2834 return self.url_result(vbox7_url, Vbox7IE.ie_key())
2836 # Look for DBTV embeds
2837 dbtv_urls = DBTVIE._extract_urls(webpage)
2839 return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
2841 # Look for Videa embeds
2842 videa_urls = VideaIE._extract_urls(webpage)
2844 return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
2846 # Look for 20 minuten embeds
2847 twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2849 return self.playlist_from_matches(
2850 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
2852 # Look for Openload embeds
2853 openload_urls = OpenloadIE._extract_urls(webpage)
2855 return self.playlist_from_matches(
2856 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
2858 # Look for VideoPress embeds
2859 videopress_urls = VideoPressIE._extract_urls(webpage)
2861 return self.playlist_from_matches(
2862 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
2864 # Look for Rutube embeds
2865 rutube_urls = RutubeIE._extract_urls(webpage)
2867 return self.playlist_from_matches(
2868 rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
2870 # Look for WashingtonPost embeds
2871 wapo_urls = WashingtonPostIE._extract_urls(webpage)
2873 return self.playlist_from_matches(
2874 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
2876 # Look for Mediaset embeds
2877 mediaset_urls = MediasetIE._extract_urls(webpage)
2879 return self.playlist_from_matches(
2880 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
2882 # Look for JOJ.sk embeds
2883 joj_urls = JojIE._extract_urls(webpage)
2885 return self.playlist_from_matches(
2886 joj_urls, video_id, video_title, ie=JojIE.ie_key())
2888 # Look for megaphone.fm embeds
2889 mpfn_urls = MegaphoneIE._extract_urls(webpage)
2891 return self.playlist_from_matches(
2892 mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
2894 # Look for vzaar embeds
2895 vzaar_urls = VzaarIE._extract_urls(webpage)
2897 return self.playlist_from_matches(
2898 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
2900 channel9_urls = Channel9IE._extract_urls(webpage)
2902 return self.playlist_from_matches(
2903 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
2905 vshare_urls = VShareIE._extract_urls(webpage)
2907 return self.playlist_from_matches(
2908 vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
2910 def merge_dicts(dict1, dict2):
2912 for k, v in dict1.items():
2915 for k, v in dict2.items():
2918 if (k not in merged or
2919 (isinstance(v, compat_str) and v and
2920 isinstance(merged[k], compat_str) and
2925 # Look for HTML5 media
2926 entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2928 if len(entries) == 1:
2931 'title': video_title,
2934 for num, entry in enumerate(entries, start=1):
2936 'id': '%s-%s' % (video_id, num),
2937 'title': '%s (%d)' % (video_title, num),
2939 for entry in entries:
2940 self._sort_formats(entry['formats'])
2941 return self.playlist_result(entries, video_id, video_title)
2943 jwplayer_data = self._find_jwplayer_data(
2944 webpage, video_id, transform_source=js_to_json)
2946 info = self._parse_jwplayer_data(
2947 jwplayer_data, video_id, require_title=False, base_url=url)
2948 return merge_dicts(info, info_dict)
2952 r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
2954 if mobj is not None:
2955 sources = self._parse_json(
2956 mobj.group(1), video_id, transform_source=js_to_json,
2958 if not isinstance(sources, list):
2961 for source in sources:
2962 src = source.get('src')
2963 if not src or not isinstance(src, compat_str):
2965 src = compat_urlparse.urljoin(url, src)
2966 src_type = source.get('type')
2967 if isinstance(src_type, compat_str):
2968 src_type = src_type.lower()
2969 ext = determine_ext(src).lower()
2970 if src_type == 'video/youtube':
2971 return self.url_result(src, YoutubeIE.ie_key())
2972 if src_type == 'application/dash+xml' or ext == 'mpd':
2973 formats.extend(self._extract_mpd_formats(
2974 src, video_id, mpd_id='dash', fatal=False))
2975 elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
2976 formats.extend(self._extract_m3u8_formats(
2977 src, video_id, 'mp4', entry_protocol='m3u8_native',
2978 m3u8_id='hls', fatal=False))
2982 'ext': (mimetype2ext(src_type) or
2983 ext if ext in KNOWN_EXTENSIONS else 'mp4'),
2986 self._sort_formats(formats)
2987 info_dict['formats'] = formats
2990 # Looking for http://schema.org/VideoObject
2991 json_ld = self._search_json_ld(
2992 webpage, video_id, default={}, expected_type='VideoObject')
2993 if json_ld.get('url'):
2994 return merge_dicts(json_ld, info_dict)
2996 def check_video(vurl):
2997 if YoutubeIE.suitable(vurl):
2999 if RtmpIE.suitable(vurl):
3001 vpath = compat_urlparse.urlparse(vurl).path
3002 vext = determine_ext(vpath)
3003 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
3005 def filter_video(urls):
3006 return list(filter(check_video, urls))
3008 # Start with something easy: JW Player in SWFObject
3009 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
3011 # Look for gorilla-vid style embedding
3012 found = filter_video(re.findall(r'''(?sx)
3016 jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
3019 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
3021 # Broaden the search a little bit
3022 found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
3024 # Broaden the findall a little bit: JWPlayer JS loader
3025 found = filter_video(re.findall(
3026 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
3029 found = filter_video(re.findall(r'''(?xs)
3030 flowplayer\("[^"]+",\s*
3032 \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
3033 ["']?url["']?\s*:\s*["']([^"']+)["']
3038 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
3040 # Try to find twitter cards info
3041 # twitter:player:stream should be checked before twitter:player since
3042 # it is expected to contain a raw stream (see
3043 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3044 found = filter_video(re.findall(
3045 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
3047 # We look for Open Graph info:
3048 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
3049 m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
3050 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
3051 if m_video_type is not None:
3052 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
3054 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
3056 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
3057 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
3060 # Look also in Refresh HTTP header
3061 refresh_header = head_response.headers.get('Refresh')
3063 # In python 2 response HTTP headers are bytestrings
3064 if sys.version_info < (3, 0) and isinstance(refresh_header, str):
3065 refresh_header = refresh_header.decode('iso-8859-1')
3066 found = re.search(REDIRECT_REGEX, refresh_header)
3068 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
3070 self.report_following_redirect(new_url)
3079 # twitter:player is a https URL to iframe player that may or may not
3080 # be supported by youtube-dl thus this is checked the very last (see
3081 # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3082 embed_url = self._html_search_meta('twitter:player', webpage, default=None)
3083 if embed_url and embed_url != url:
3084 return self.url_result(embed_url)
3087 raise UnsupportedError(url)
3090 for video_url in orderedSet(found):
3091 video_url = unescapeHTML(video_url)
3092 video_url = video_url.replace('\\/', '/')
3093 video_url = compat_urlparse.urljoin(url, video_url)
3094 video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
3096 # Sometimes, jwplayer extraction will result in a YouTube URL
3097 if YoutubeIE.suitable(video_url):
3098 entries.append(self.url_result(video_url, 'Youtube'))
3101 # here's a fun little line of code for you:
3102 video_id = os.path.splitext(video_id)[0]
3106 'uploader': video_uploader,
3107 'title': video_title,
3108 'age_limit': age_limit,
3111 if RtmpIE.suitable(video_url):
3112 entry_info_dict.update({
3113 '_type': 'url_transparent',
3114 'ie_key': RtmpIE.ie_key(),
3117 entries.append(entry_info_dict)
3120 ext = determine_ext(video_url)
3122 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
3124 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
3126 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
3128 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
3130 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
3131 elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
3132 # Just matching .ism/manifest is not enough to be reliably sure
3133 # whether it's actually an ISM manifest or some other streaming
3134 # manifest since there are various streaming URL formats
3135 # possible (see [1]) as well as some other shenanigans like
3136 # .smil/manifest URLs that actually serve an ISM (see [2]) and
3138 # Thus the most reasonable way to solve this is to delegate
3139 # to generic extractor in order to look into the contents of
3140 # the manifest itself.
3141 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
3142 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
3143 entry_info_dict = self.url_result(
3144 smuggle_url(video_url, {'to_generic': True}),
3147 entry_info_dict['url'] = video_url
3149 if entry_info_dict.get('formats'):
3150 self._sort_formats(entry_info_dict['formats'])
3152 entries.append(entry_info_dict)
3154 if len(entries) == 1:
3157 for num, e in enumerate(entries, start=1):
3158 # 'url' results don't have a title
3159 if e.get('title') is not None:
3160 e['title'] = '%s (%d)' % (e['title'], num)
3162 '_type': 'playlist',