Merge pull request #6259 from remitamine/howstuffworks

author Sergey M. <dstftw@gmail.com>

Fri, 17 Jul 2015 16:08:40 +0000 (22:08 +0600)

committer Sergey M. <dstftw@gmail.com>

Fri, 17 Jul 2015 16:08:40 +0000 (22:08 +0600)
author Sergey M. <dstftw@gmail.com>
Fri, 17 Jul 2015 16:08:40 +0000 (22:08 +0600)
committer Sergey M. <dstftw@gmail.com>
Fri, 17 Jul 2015 16:08:40 +0000 (22:08 +0600)
diff --git a/AUTHORS b/AUTHORS

index 889d599a2d113b83dc505a664207d7258398fda6..531ec5767c9b976bebfc77c760e7e971f405f9be 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -128,3 +128,7 @@ Ping O.
  Mister Hat
  Peter Ding
  jackyzy823
+George Brighton
+Remita Amine
+Aurélio A. Heckert
+Bernhard Minks
diff --git a/README.md b/README.md

index 5f3a08f5a9839e2438498cc32d284b341c4fb7f8..a2cc89cdb392cddb9dcd2a552ed2c3d864973cfb 100644 (file)
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ which means you can modify it, redistribute it or use it however you like.
      --dump-user-agent                Display the current browser identification
      --list-extractors                List all supported extractors
      --extractor-descriptions         Output descriptions of all supported extractors
+    --force-generic-extractor        Force extraction to use the generic extractor
      --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
                                       Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
                                       default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
@@ -107,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
      --playlist-reverse               Download playlist videos in reverse order
      --xattr-set-filesize             Set file xattribute ytdl.filesize with expected filesize (experimental)
      --hls-prefer-native              Use the native HLS downloader instead of ffmpeg (experimental)
-    --external-downloader COMMAND    Use the specified external downloader. Currently supports aria2c,curl,wget
+    --external-downloader COMMAND    Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
      --external-downloader-args ARGS  Give these arguments to the external downloader
  
  ## Filesystem Options:
@@ -189,8 +190,8 @@ which means you can modify it, redistribute it or use it however you like.
      --all-formats                    Download all available video formats
      --prefer-free-formats            Prefer free video formats unless a specific one is requested
      -F, --list-formats               List all available formats
-    --youtube-skip-dash-manifest     Do not download the DASH manifest on YouTube videos
-    --merge-output-format FORMAT     If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+    --youtube-skip-dash-manifest     Do not download the DASH manifests and related data on YouTube videos
+    --merge-output-format FORMAT     If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no
                                       merge is required
  
  ## Subtitle Options:
@@ -213,7 +214,8 @@ which means you can modify it, redistribute it or use it however you like.
      --audio-format FORMAT            Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
      --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
                                       5)
-    --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+    --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)
+    --postprocessor-args ARGS        Give these arguments to the postprocessor
      -k, --keep-video                 Keep the video file on disk after the post-processing; the video is erased by default
      --no-post-overwrites             Do not overwrite post-processed files; the post-processed files are overwritten by default
      --embed-subs                     Embed subtitles in the video (only for mkv and mp4 videos)
@@ -236,6 +238,26 @@ which means you can modify it, redistribute it or use it however you like.
  
  You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
  
+### Authentication with `.netrc` file ###
+
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in shell command history. You can achieve this using [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
+```
+touch $HOME/.netrc
+chmod a-rwx,u+rw $HOME/.netrc
+```
+After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
+```
+machine <extractor> login <login> password <password>
+```
+For example:
+```
+machine youtube login myaccount@gmail.com password my_youtube_password
+machine twitch login my_twitch_account_name password my_twitch_password
+```
+To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
+
+On Windows you may also need to setup `%HOME%` environment variable manually.
+
  # OUTPUT TEMPLATE
  
  The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
@@ -379,7 +401,7 @@ In February 2015, the new YouTube player contained a character sequence in a str
  
  ### HTTP Error 429: Too Many Requests or 402: Payment Required
  
-These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
+These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
  
  ### SyntaxError: Non-ASCII character ###
  
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 220e52b988ace1314ec866f59a8ef34562f1b583..0ca06c71dbcf44d6100a196c2463e7ec00de5f62 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -17,6 +17,7 @@
   - **AcademicEarth:Course**
   - **AddAnime**
   - **AdobeTV**
+ - **AdobeTVVideo**
   - **AdultSwim**
   - **Aftenposten**
   - **Aftonbladet**
@@ -110,6 +111,7 @@
   - **dailymotion**
   - **dailymotion:playlist**
   - **dailymotion:user**
+ - **DailymotionCloud**
   - **daum.net**
   - **DBTV**
   - **DctpTv**
@@ -281,6 +283,7 @@
   - **Motherless**
   - **Motorsport**: motorsport.com
   - **MovieClips**
+ - **MovieFap**
   - **Moviezine**
   - **movshare**: MovShare
   - **MPORA**
@@ -344,6 +347,7 @@
   - **Odnoklassniki**
   - **OktoberfestTV**
   - **on.aol.com**
+ - **OnionStudios**
   - **Ooyala**
   - **OoyalaExternal**
   - **OpenFilm**
@@ -357,6 +361,7 @@
   - **PhilharmonieDeParis**: Philharmonie de Paris
   - **Phoenix**
   - **Photobucket**
+ - **Pinkbike**
   - **Pladform**
   - **PlanetaPlay**
   - **play.fm**
@@ -379,6 +384,7 @@
   - **Pyvideo**
   - **qqmusic**
   - **qqmusic:album**
+ - **qqmusic:playlist**
   - **qqmusic:singer**
   - **qqmusic:toplist**
   - **QuickVid**
@@ -436,6 +442,8 @@
   - **smotri:broadcast**: Smotri.com broadcasts
   - **smotri:community**: Smotri.com community videos
   - **smotri:user**: Smotri.com user videos
+ - **SnagFilms**
+ - **SnagFilmsEmbed**
   - **Snotr**
   - **Sohu**
   - **soompi**
@@ -498,6 +506,7 @@
   - **TheOnion**
   - **ThePlatform**
   - **TheSixtyOne**
+ - **ThisAmericanLife**
   - **ThisAV**
   - **THVideo**
   - **THVideoPlaylist**
@@ -538,6 +547,7 @@
   - **twitch:stream**
   - **twitch:video**
   - **twitch:vod**
+ - **TwitterCard**
   - **Ubu**
   - **udemy**
   - **udemy:course**
@@ -612,6 +622,7 @@
   - **XBef**
   - **XboxClips**
   - **XHamster**
+ - **XHamsterEmbed**
   - **XMinus**
   - **XNXX**
   - **Xstream**
@@ -628,7 +639,7 @@
   - **YesJapan**
   - **Ynet**
   - **YouJizz**
- - **Youku**
+ - **youku**
   - **YouPorn**
   - **YourUpload**
   - **youtube**: YouTube.com
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 6e4b6f56664f67a796b21f10b8d005f2f6e5b68d..00af78e0600f8d2136116e91bcda179f70dbf9a5 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -139,6 +139,7 @@ class YoutubeDL(object):
      outtmpl:           Template for output names.
      restrictfilenames: Do not allow "&" and spaces in file names
      ignoreerrors:      Do not stop on download errors.
+    force_generic_extractor: Force downloader to use the generic extractor
      nooverwrites:      Prevent overwriting files.
      playliststart:     Playlist item to start at.
      playlistend:       Playlist item to end at.
@@ -261,6 +262,8 @@ class YoutubeDL(object):
      The following options are used by the post processors:
      prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                         otherwise prefer avconv.
+    postprocessor_args: A list of additional command-line arguments for the
+                        postprocessor.
      """
  
      params = None
@@ -626,13 +629,16 @@ class YoutubeDL(object):
              info_dict.setdefault(key, value)
  
      def extract_info(self, url, download=True, ie_key=None, extra_info={},
-                     process=True):
+                     process=True, force_generic_extractor=False):
          '''
          Returns a list with a dictionary for each video we find.
          If 'download', also downloads the videos.
          extra_info is a dict containing the extra values to add to each result
          '''
  
+        if not ie_key and force_generic_extractor:
+            ie_key = 'Generic'
+
          if ie_key:
              ies = [self.get_info_extractor(ie_key)]
          else:
@@ -1004,7 +1010,7 @@ class YoutubeDL(object):
                  t.get('preference'), t.get('width'), t.get('height'),
                  t.get('id'), t.get('url')))
              for i, t in enumerate(thumbnails):
-                if 'width' in t and 'height' in t:
+                if t.get('width') and t.get('height'):
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
                  if t.get('id') is None:
                      t['id'] = '%d' % i
@@ -1493,7 +1499,8 @@ class YoutubeDL(object):
          for url in url_list:
              try:
                  # It also downloads the videos
-                res = self.extract_info(url)
+                res = self.extract_info(
+                    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
              except UnavailableVideoError:
                  self.report_error('unable to download video')
              except MaxDownloadsReached:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index ace17857c8cb28320ba1fab2988e56c020583af7..55b22c889f97c73d28e732466f850bcfb1615c83 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -169,7 +169,7 @@ def _real_main(argv=None):
          if not opts.audioquality.isdigit():
              parser.error('invalid audio quality specified')
      if opts.recodevideo is not None:
-        if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
+        if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
              parser.error('invalid video recode format specified')
      if opts.convertsubtitles is not None:
          if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
@@ -263,6 +263,9 @@ def _real_main(argv=None):
      external_downloader_args = None
      if opts.external_downloader_args:
          external_downloader_args = shlex.split(opts.external_downloader_args)
+    postprocessor_args = None
+    if opts.postprocessor_args:
+        postprocessor_args = shlex.split(opts.postprocessor_args)
      match_filter = (
          None if opts.match_filter is None
          else match_filter_func(opts.match_filter))
@@ -293,6 +296,7 @@ def _real_main(argv=None):
          'autonumber_size': opts.autonumber_size,
          'restrictfilenames': opts.restrictfilenames,
          'ignoreerrors': opts.ignoreerrors,
+        'force_generic_extractor': opts.force_generic_extractor,
          'ratelimit': opts.ratelimit,
          'nooverwrites': opts.nooverwrites,
          'retries': opts_retries,
@@ -366,6 +370,7 @@ def _real_main(argv=None):
          'ffmpeg_location': opts.ffmpeg_location,
          'hls_prefer_native': opts.hls_prefer_native,
          'external_downloader_args': external_downloader_args,
+        'postprocessor_args': postprocessor_args,
          'cn_verification_proxy': opts.cn_verification_proxy,
      }
  
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index f9529210dd955932eca837aa7022696470c557ed..c3783337a5a801cd5230f2ae2f5d82112d4b6cea 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -9,6 +9,7 @@ import shutil
  import socket
  import subprocess
  import sys
+import itertools
  
  
  try:
@@ -388,6 +389,15 @@ else:
              pass
          return _terminal_size(columns, lines)
  
+try:
+    itertools.count(start=0, step=1)
+    compat_itertools_count = itertools.count
+except TypeError:  # Python 2.6
+    def compat_itertools_count(start=0, step=1):
+        n = start
+        while True:
+            yield n
+            n += step
  
  __all__ = [
      'compat_HTTPError',
@@ -401,6 +411,7 @@ __all__ = [
      'compat_html_entities',
      'compat_http_client',
      'compat_http_server',
+    'compat_itertools_count',
      'compat_kwargs',
      'compat_ord',
      'compat_parse_qs',
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py

index 7ca2d314348400fc6ba3095e23d69e1c925ad7dd..1d5cc99043d02f658064e688c268c37171c37325 100644 (file)
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -109,6 +109,14 @@ class Aria2cFD(ExternalFD):
          cmd += ['--', info_dict['url']]
          return cmd
  
+
+class HttpieFD(ExternalFD):
+    def _make_cmd(self, tmpfilename, info_dict):
+        cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
+        for key, val in info_dict['http_headers'].items():
+            cmd += ['%s:%s' % (key, val)]
+        return cmd
+
  _BY_NAME = dict(
      (klass.get_basename(), klass)
      for name, klass in globals().items()
@@ -123,5 +131,6 @@ def list_external_downloaders():
  def get_external_downloader(external_downloader):
      """ Given the name of the executable, see whether we support the given
          downloader . """
-    bn = os.path.basename(external_downloader)
+    # Drop .exe extension on Windows
+    bn = os.path.splitext(os.path.basename(external_downloader))[0]
      return _BY_NAME[bn]
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index dc1a302e69c13f00d67230b2920ce50106ea2629..06f21064b699fe8b3012836949c655e6e284ef29 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -144,7 +144,6 @@ from .ellentv import (
  )
  from .elpais import ElPaisIE
  from .embedly import EmbedlyIE
-from .empflix import EMPFlixIE
  from .engadget import EngadgetIE
  from .eporner import EpornerIE
  from .eroprofile import EroProfileIE
@@ -261,6 +260,14 @@ from .keek import KeekIE
  from .kontrtube import KontrTubeIE
  from .krasview import KrasViewIE
  from .ku6 import Ku6IE
+from .kuwo import (
+    KuwoIE,
+    KuwoAlbumIE,
+    KuwoChartIE,
+    KuwoSingerIE,
+    KuwoCategoryIE,
+    KuwoMvIE,
+)
  from .la7 import LA7IE
  from .laola1tv import Laola1TvIE
  from .letv import (
@@ -324,6 +331,7 @@ from .musicvault import MusicVaultIE
  from .muzu import MuzuTVIE
  from .myspace import MySpaceIE, MySpaceAlbumIE
  from .myspass import MySpassIE
+from .myvi import MyviIE
  from .myvideo import MyVideoIE
  from .myvidster import MyVidsterIE
  from .nationalgeographic import NationalGeographicIE
@@ -343,6 +351,15 @@ from .ndtv import NDTVIE
  from .netzkino import NetzkinoIE
  from .nerdcubed import NerdCubedFeedIE
  from .nerdist import NerdistIE
+from .neteasemusic import (
+    NetEaseMusicIE,
+    NetEaseMusicAlbumIE,
+    NetEaseMusicSingerIE,
+    NetEaseMusicListIE,
+    NetEaseMusicMvIE,
+    NetEaseMusicProgramIE,
+    NetEaseMusicDjRadioIE,
+)
  from .newgrounds import NewgroundsIE
  from .newstube import NewstubeIE
  from .nextmedia import (
@@ -372,7 +389,8 @@ from .npo import (
      NPOLiveIE,
      NPORadioIE,
      NPORadioFragmentIE,
-    TegenlichtVproIE,
+    VPROIE,
+    WNLIE
  )
  from .nrk import (
      NRKIE,
@@ -388,6 +406,7 @@ from .nytimes import (
  from .nuvid import NuvidIE
  from .odnoklassniki import OdnoklassnikiIE
  from .oktoberfesttv import OktoberfestTVIE
+from .onionstudios import OnionStudiosIE
  from .ooyala import (
      OoyalaIE,
      OoyalaExternalIE,
@@ -432,6 +451,7 @@ from .qqmusic import (
      QQMusicSingerIE,
      QQMusicAlbumIE,
      QQMusicToplistIE,
+    QQMusicPlaylistIE,
  )
  from .quickvid import QuickVidIE
  from .r7 import R7IE
@@ -441,6 +461,7 @@ from .radiobremen import RadioBremenIE
  from .radiofrance import RadioFranceIE
  from .rai import RaiIE
  from .rbmaradio import RBMARadioIE
+from .rds import RDSIE
  from .redtube import RedTubeIE
  from .restudy import RestudyIE
  from .reverbnation import ReverbNationIE
@@ -492,6 +513,10 @@ from .smotri import (
      SmotriUserIE,
      SmotriBroadcastIE,
  )
+from .snagfilms import (
+    SnagFilmsIE,
+    SnagFilmsEmbedIE,
+)
  from .snotr import SnotrIE
  from .sohu import SohuIE
  from .soompi import (
@@ -565,6 +590,7 @@ from .tf1 import TF1IE
  from .theonion import TheOnionIE
  from .theplatform import ThePlatformIE
  from .thesixtyone import TheSixtyOneIE
+from .thisamericanlife import ThisAmericanLifeIE
  from .thisav import ThisAVIE
  from .tinypic import TinyPicIE
  from .tlc import TlcIE, TlcDeIE
@@ -572,7 +598,11 @@ from .tmz import (
      TMZIE,
      TMZArticleIE,
  )
-from .tnaflix import TNAFlixIE
+from .tnaflix import (
+    TNAFlixIE,
+    EMPFlixIE,
+    MovieFapIE,
+)
  from .thvideo import (
      THVideoIE,
      THVideoPlaylistIE
@@ -616,6 +646,7 @@ from .twitch import (
      TwitchBookmarksIE,
      TwitchStreamIE,
  )
+from .twitter import TwitterCardIE
  from .ubu import UbuIE
  from .udemy import (
      UdemyIE,
@@ -692,7 +723,10 @@ from .wdr import (
      WDRMobileIE,
      WDRMausIE,
  )
-from .webofstories import WebOfStoriesIE
+from .webofstories import (
+    WebOfStoriesIE,
+    WebOfStoriesPlaylistIE,
+)
  from .weibo import WeiboIE
  from .wimp import WimpIE
  from .wistia import WistiaIE
@@ -723,6 +757,7 @@ from .yandexmusic import (
      YandexMusicPlaylistIE,
  )
  from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
  from .ynet import YnetIE
  from .youjizz import YouJizzIE
  from .youku import YoukuIE
diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py

index 906895c1e197f510c358e47376c6f50d81bd8bdf..e37ee44403a34afe03c02660a71765e2af89cdba 100644 (file)
--- a/youtube_dl/extractor/baidu.py
+++ b/youtube_dl/extractor/baidu.py
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
  
  
  class BaiduVideoIE(InfoExtractor):
+    IE_DESC = '百度视频'
      _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
      _TESTS = [{
          'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index d768f99e67bd49de31e3f0df70d832284cbb3482..4721c22930f15cb51d0daaac294eeeca3a329092 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -13,6 +13,7 @@ from ..compat import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
+    compat_xml_parse_error,
  )
  from ..utils import (
      determine_ext,
@@ -119,7 +120,7 @@ class BrightcoveIE(InfoExtractor):
  
          try:
              object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
-        except xml.etree.ElementTree.ParseError:
+        except compat_xml_parse_error:
              return
  
          fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py

index d07d544eaf7742bb782a8b367a09561f14c44a4e..8306d6fb7d0d4414cff36f7b381ca9c877820f58 100644 (file)
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      find_xpath_attr,
@@ -10,9 +8,9 @@ from ..utils import (
  
  
  class ClipsyndicateIE(InfoExtractor):
-    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+    _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
          'md5': '4d7d549451bad625e0ff3d7bd56d776c',
          'info_dict': {
@@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor):
              'duration': 612,
              'thumbnail': 're:^https?://.+\.jpg',
          },
-    }
+    }, {
+        'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
          js_player = self._download_webpage(
              'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
              video_id, 'Downlaoding player')
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 49e4dc7109e151ae124ed1aac15a9762d00eac21..5a2d0d995c75cbbc94709bb8669928de84f46a29 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -22,18 +22,20 @@ from ..compat import (
      compat_str,
  )
  from ..utils import (
+    NO_DEFAULT,
      age_restricted,
      bug_reports_message,
      clean_html,
      compiled_regex_type,
+    determine_ext,
      ExtractorError,
+    fix_xml_ampersands,
      float_or_none,
      int_or_none,
      RegexNotFoundError,
      sanitize_filename,
      unescapeHTML,
  )
-_NO_DEFAULT = object()
  
  
  class InfoExtractor(object):
@@ -523,7 +525,7 @@ class InfoExtractor(object):
              video_info['description'] = playlist_description
          return video_info
  
-    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
+    def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
          """
          Perform a regex search on the given string, using a single or a list of
          patterns returning the first matching group.
@@ -549,7 +551,7 @@ class InfoExtractor(object):
                  return next(g for g in mobj.groups() if g is not None)
              else:
                  return mobj.group(group)
-        elif default is not _NO_DEFAULT:
+        elif default is not NO_DEFAULT:
              return default
          elif fatal:
              raise RegexNotFoundError('Unable to extract %s' % _name)
@@ -557,7 +559,7 @@ class InfoExtractor(object):
              self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
              return None
  
-    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
+    def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
          """
          Like _search_regex, but strips HTML tags and unescapes entities.
          """
@@ -705,6 +707,25 @@ class InfoExtractor(object):
          return self._html_search_meta('twitter:player', html,
                                        'twitter card player')
  
+    @staticmethod
+    def _hidden_inputs(html):
+        return dict([
+            (input.group('name'), input.group('value')) for input in re.finditer(
+                r'''(?x)
+                    <input\s+
+                        type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
+                        name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
+                        (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
+                        value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
+                ''', html)
+        ])
+
+    def _form_hidden_inputs(self, form_id, html):
+        form = self._search_regex(
+            r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+            html, '%s form' % form_id, group='form')
+        return self._hidden_inputs(form)
+
      def _sort_formats(self, formats, field_preference=None):
          if not formats:
              raise ExtractorError('No video formats found')
@@ -815,10 +836,14 @@ class InfoExtractor(object):
          self.to_screen(msg)
          time.sleep(timeout)
  
-    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
+    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+                             transform_source=lambda s: fix_xml_ampersands(s).strip()):
          manifest = self._download_xml(
              manifest_url, video_id, 'Downloading f4m manifest',
-            'Unable to download f4m manifest')
+            'Unable to download f4m manifest',
+            # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+            # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+            transform_source=transform_source)
  
          formats = []
          manifest_version = '1.0'
@@ -828,8 +853,19 @@ class InfoExtractor(object):
              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
          for i, media_el in enumerate(media_nodes):
              if manifest_version == '2.0':
-                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
-                                (media_el.attrib.get('href') or media_el.attrib.get('url')))
+                media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
+                if not media_url:
+                    continue
+                manifest_url = (
+                    media_url if media_url.startswith('http://') or media_url.startswith('https://')
+                    else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
+                # If media_url is itself a f4m manifest do the recursive extraction
+                # since bitrates in parent manifest (this one) and media_url manifest
+                # may differ leading to inability to resolve the format by requested
+                # bitrate in f4m downloader
+                if determine_ext(manifest_url) == 'f4m':
+                    formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
+                    continue
              tbr = int_or_none(media_el.attrib.get('bitrate'))
              formats.append({
                  'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
@@ -846,7 +882,8 @@ class InfoExtractor(object):
  
      def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                                entry_protocol='m3u8', preference=None,
-                              m3u8_id=None, note=None, errnote=None):
+                              m3u8_id=None, note=None, errnote=None,
+                              fatal=True):
  
          formats = [{
              'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@@ -866,7 +903,10 @@ class InfoExtractor(object):
          m3u8_doc = self._download_webpage(
              m3u8_url, video_id,
              note=note or 'Downloading m3u8 information',
-            errnote=errnote or 'Failed to download m3u8 information')
+            errnote=errnote or 'Failed to download m3u8 information',
+            fatal=fatal)
+        if m3u8_doc is False:
+            return m3u8_doc
          last_info = None
          last_media = None
          kv_rex = re.compile(
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 41f0c736d98c229518bacb41fac2f35ce9b80958..73f1e22efdc5040d55042fdc1eb47a78c4e56468 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -27,7 +27,7 @@ from ..aes import (
  
  
  class CrunchyrollIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
      _NETRC_MACHINE = 'crunchyroll'
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -45,6 +45,22 @@ class CrunchyrollIE(InfoExtractor):
              # rtmp
              'skip_download': True,
          },
+    }, {
+        'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
+        'info_dict': {
+            'id': '589804',
+            'ext': 'flv',
+            'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
+            'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Danny Choo Network',
+            'upload_date': '20120213',
+        },
+        'params': {
+            # rtmp
+            'skip_download': True,
+        },
+
      }, {
          'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
          'only_matching': True,
@@ -251,16 +267,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
          for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
              stream_quality, stream_format = self._FORMAT_IDS[fmt]
              video_format = fmt + 'p'
-            streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
-            # urlencode doesn't work!
-            streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
+            streamdata_req = compat_urllib_request.Request(
+                'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
+                % (stream_id, stream_format, stream_quality),
+                compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
              streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
              streamdata = self._download_xml(
                  streamdata_req, video_id,
                  note='Downloading media info for %s' % video_format)
-            video_url = streamdata.find('./host').text
-            video_play_path = streamdata.find('./file').text
+            stream_info = streamdata.find('./{default}preload/stream_info')
+            video_url = stream_info.find('./host').text
+            video_play_path = stream_info.find('./file').text
              formats.append({
                  'url': video_url,
                  'play_path': video_play_path,
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py

index 0226f8036c81d97246c87f2308614d84202ae540..45049bf371370da6e4b64952441e76d86814fd6a 100644 (file)
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dl/extractor/ctsnews.py
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601, ExtractorError
  
  
  class CtsNewsIE(InfoExtractor):
+    IE_DESC = '華視新聞'
      # https connection failed (Connection reset)
      _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
      _TESTS = [{
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index 96f0ed9ad19756b3d380d8023af6c61bee1c18a9..1a41c0db181c43c9c35d24988b9fd118c248f399 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -53,6 +53,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                  'uploader': 'IGN',
                  'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
                  'upload_date': '20150306',
+                'duration': 74,
              }
          },
          # Vevo video
@@ -164,6 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
              'thumbnail': info['thumbnail_url'],
              'age_limit': age_limit,
              'view_count': view_count,
+            'duration': info['duration']
          }
  
      def _get_subtitles(self, video_id, webpage):
@@ -254,22 +256,30 @@ class DailymotionUserIE(DailymotionPlaylistIE):
  
  
  class DailymotionCloudIE(DailymotionBaseInfoExtractor):
-    _VALID_URL = r'http://api\.dmcloud\.net/embed/[^/]+/(?P<id>[^/?]+)'
+    _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
+    _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
+    _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
  
-    _TEST = {
+    _TESTS = [{
          # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
          # Tested at FranceTvInfo_2
          'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
          'only_matching': True,
-    }
+    }, {
+        # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
+        'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
+        'only_matching': True,
+    }]
  
      @classmethod
      def _extract_dmcloud_url(self, webpage):
-        mobj = re.search(r'<iframe[^>]+src=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
+        mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage)
          if mobj:
              return mobj.group(1)
  
-        mobj = re.search(r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
+        mobj = re.search(
+            r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL,
+            webpage)
          if mobj:
              return mobj.group(1)
  
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py

index 8049779b0a31049f704bae256a3752a9a22ad789..263532cc6e66a94c79670caa5e1600444ce909da 100644 (file)
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import unified_strdate
  
  
  class DFBIE(InfoExtractor):
      IE_NAME = 'tv.dfb.de'
-    _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
  
      _TEST = {
-        'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+        'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
          # The md5 is different each time
          'info_dict': {
-            'id': '9070',
+            'id': '11633',
+            'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
              'ext': 'flv',
-            'title': 'Highlights des Empfangs in Berlin',
-            'upload_date': '20140716',
+            'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
+            'upload_date': '20150714',
          },
      }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
  
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)
          player_info = self._download_xml(
              'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
-            video_id)
+            display_id)
          video_info = player_info.find('video')
  
-        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
+        f4m_info = self._download_xml(
+            self._proto_relative_url(video_info.find('url').text.strip()), display_id)
          token_el = f4m_info.find('token')
          manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+        formats = self._extract_f4m_formats(manifest_url, display_id)
  
          return {
              'id': video_id,
+            'display_id': display_id,
              'title': video_info.find('title').text,
-            'url': manifest_url,
-            'ext': 'flv',
              'thumbnail': self._og_search_thumbnail(webpage),
-            'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+            'upload_date': unified_strdate(video_info.find('time_date').text),
+            'formats': formats,
          }
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py

index 479430c51072ab91e976df4d459af372c5608cdd..373b3b4b4735d8544128c48a10037eed3c570e5d 100644 (file)
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dl/extractor/douyutv.py
@@ -9,6 +9,7 @@ from ..compat import (compat_str, compat_basestring)
  
  
  class DouyuTVIE(InfoExtractor):
+    IE_DESC = '斗鱼'
      _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
      _TESTS = [{
          'url': 'http://www.douyutv.com/iseven',
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py

index ca41a3abf324a9c2237c02047f7384f5ecc9cff6..38e6597c80f203b30a90a13c92027a4a5a305bd7 100644 (file)
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -23,8 +23,23 @@ class DramaFeverBaseIE(InfoExtractor):
      _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
      _NETRC_MACHINE = 'dramafever'
  
+    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
+
+    _consumer_secret = None
+
+    def _get_consumer_secret(self):
+        mainjs = self._download_webpage(
+            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
+            None, 'Downloading main.js', fatal=False)
+        if not mainjs:
+            return self._CONSUMER_SECRET
+        return self._search_regex(
+            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
+            'consumer secret', default=self._CONSUMER_SECRET)
+
      def _real_initialize(self):
          self._login()
+        self._consumer_secret = self._get_consumer_secret()
  
      def _login(self):
          (username, password) = self._get_login_info()
@@ -119,6 +134,23 @@ class DramaFeverIE(DramaFeverBaseIE):
                  'url': href,
              }]
  
+        series_id, episode_number = video_id.split('.')
+        episode_info = self._download_json(
+            # We only need a single episode info, so restricting page size to one episode
+            # and dealing with page number as with episode number
+            r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
+            % (self._consumer_secret, series_id, episode_number),
+            video_id, 'Downloading episode info JSON', fatal=False)
+        if episode_info:
+            value = episode_info.get('value')
+            if value:
+                subfile = value[0].get('subfile') or value[0].get('new_subfile')
+                if subfile and subfile != 'http://www.dramafever.com/st/':
+                    subtitles.setdefault('English', []).append({
+                        'ext': 'srt',
+                        'url': subfile,
+                    })
+
          return {
              'id': video_id,
              'title': title,
@@ -152,27 +184,14 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
          'playlist_count': 20,
      }]
  
-    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
      _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
  
-    def _get_consumer_secret(self, video_id):
-        mainjs = self._download_webpage(
-            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
-            video_id, 'Downloading main.js', fatal=False)
-        if not mainjs:
-            return self._CONSUMER_SECRET
-        return self._search_regex(
-            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
-            'consumer secret', default=self._CONSUMER_SECRET)
-
      def _real_extract(self, url):
          series_id = self._match_id(url)
  
-        consumer_secret = self._get_consumer_secret(series_id)
-
          series = self._download_json(
              'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
-            % (consumer_secret, series_id),
+            % (self._consumer_secret, series_id),
              series_id, 'Downloading series JSON')['series'][series_id]
  
          title = clean_html(series['name'])
@@ -182,7 +201,7 @@ class DramaFeverSeriesIE(DramaFeverBaseIE):
          for page_num in itertools.count(1):
              episodes = self._download_json(
                  'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
-                % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
+                % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
                  series_id, 'Downloading episodes JSON page #%d' % page_num)
              for episode in episodes.get('value', []):
                  episode_url = episode.get('episode_url')
diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py

index 7626219baf33522958960bca0d8babe67b8a332e..8b98b013adeee32c67c769acfb88d76edff9a1f7 100644 (file)
--- a/youtube_dl/extractor/drbonanza.py
+++ b/youtube_dl/extractor/drbonanza.py
@@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
  
      _TESTS = [{
          'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
-        'md5': 'fe330252ddea607635cf2eb2c99a0af3',
          'info_dict': {
              'id': '65517',
              'ext': 'mp4',
@@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
              'upload_date': '20110120',
              'duration': 3664,
          },
+        'params': {
+            'skip_download': True,  # requires rtmp
+        },
      }, {
          'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
          'md5': '6dfe039417e76795fb783c52da3de11d',
@@ -93,6 +95,11 @@ class DRBonanzaIE(InfoExtractor):
                          'format_id': file['Type'].replace('Video', ''),
                          'preference': preferencemap.get(file['Type'], -10),
                      })
+                    if format['url'].startswith('rtmp'):
+                        rtmp_url = format['url']
+                        format['rtmp_live'] = True  # --resume does not work
+                        if '/bonanza/' in rtmp_url:
+                            format['play_path'] = rtmp_url.split('/bonanza/')[1]
                      formats.append(format)
                  elif file['Type'] == "Thumb":
                      thumbnail = file['Location']
@@ -111,9 +118,6 @@ class DRBonanzaIE(InfoExtractor):
          description = '%s\n%s\n%s\n' % (
              info['Description'], info['Actors'], info['Colophon'])
  
-        for f in formats:
-            f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
-            f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
          self._sort_formats(formats)
  
          display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py

index 37c5c181f799efd8ee69d850c0b6076130c64073..639f9182c5484a22f0056e25fc6aa7e56f193df5 100644 (file)
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -36,25 +36,24 @@ class DrTuberIE(InfoExtractor):
              r'<source src="([^"]+)"', webpage, 'video URL')
  
          title = self._html_search_regex(
-            [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
+            [r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
              webpage, 'title')
  
          thumbnail = self._html_search_regex(
              r'poster="([^"]+)"',
              webpage, 'thumbnail', fatal=False)
  
-        like_count = str_to_int(self._html_search_regex(
-            r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
-            webpage, 'like count', fatal=False))
-        dislike_count = str_to_int(self._html_search_regex(
-            r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
-            webpage, 'like count', fatal=False))
-        comment_count = str_to_int(self._html_search_regex(
-            r'<span class="comments_count">([\d,\.]+)</span>',
-            webpage, 'comment count', fatal=False))
+        def extract_count(id_, name):
+            return str_to_int(self._html_search_regex(
+                r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
+                webpage, '%s count' % name, fatal=False))
+
+        like_count = extract_count('rate_likes', 'like')
+        dislike_count = extract_count('rate_dislikes', 'dislike')
+        comment_count = extract_count('comments_count', 'comment')
  
          cats_str = self._search_regex(
-            r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
+            r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
          categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
  
          return {
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py

deleted file mode 100644 (file)

index 4827022..0000000
--- a/youtube_dl/extractor/empflix.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from __future__ import unicode_literals
-
-from .tnaflix import TNAFlixIE
-
-
-class EMPFlixIE(TNAFlixIE):
-    _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
-
-    _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
-    _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
-    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
-
-    _TESTS = [
-        {
-            'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
-            'md5': 'b1bc15b6412d33902d6e5952035fcabc',
-            'info_dict': {
-                'id': '33051',
-                'display_id': 'Amateur-Finger-Fuck',
-                'ext': 'mp4',
-                'title': 'Amateur Finger Fuck',
-                'description': 'Amateur solo finger fucking.',
-                'thumbnail': 're:https?://.*\.jpg$',
-                'age_limit': 18,
-            }
-        },
-        {
-            'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
-            'only_matching': True,
-        }
-    ]
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 5c03fddc6a2ee548a6617a3aea9ba161f6b3777d..392ad36486c8e953f4d3bed341353a3b5d11e65a 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -37,6 +37,7 @@ from .rutv import RUTVIE
  from .tvc import TVCIE
  from .sportbox import SportBoxEmbedIE
  from .smotri import SmotriIE
+from .myvi import MyviIE
  from .condenast import CondeNastIE
  from .udn import UDNEmbedIE
  from .senateisvp import SenateISVPIE
@@ -46,6 +47,8 @@ from .pornhub import PornHubIE
  from .xhamster import XHamsterEmbedIE
  from .vimeo import VimeoIE
  from .dailymotion import DailymotionCloudIE
+from .onionstudios import OnionStudiosIE
+from .snagfilms import SnagFilmsEmbedIE
  
  
  class GenericIE(InfoExtractor):
@@ -336,6 +339,17 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        # Myvi.ru embed
+        {
+            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+            'info_dict': {
+                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+                'ext': 'mp4',
+                'title': 'Ужастики, русский трейлер (2015)',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 153,
+            }
+        },
          # XHamster embed
          {
              'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
@@ -667,6 +681,18 @@ class GenericIE(InfoExtractor):
                  'title': 'John Carlson Postgame 2/25/15',
              },
          },
+        # Kaltura embed (different embed code)
+        {
+            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+            'info_dict': {
+                'id': '1_a52wc67y',
+                'ext': 'flv',
+                'upload_date': '20150127',
+                'uploader_id': 'PremierMedia',
+                'timestamp': int,
+                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+            },
+        },
          # Eagle.Platform embed (generic URL)
          {
              'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -836,6 +862,27 @@ class GenericIE(InfoExtractor):
                  'thumbnail': 're:^https?://.*\.jpe?g$',
              }
          },
+        # OnionStudios embed
+        {
+            'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
+            'info_dict': {
+                'id': '2855',
+                'ext': 'mp4',
+                'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+                'thumbnail': 're:^https?://.*\.jpe?g$',
+                'uploader': 'ClickHole',
+                'uploader_id': 'clickhole',
+            }
+        },
+        # SnagFilms embed
+        {
+            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+            'info_dict': {
+                'id': '74849a00-85a9-11e1-9660-123139220831',
+                'ext': 'mp4',
+                'title': '#whilewewatch',
+            }
+        },
          # AdobeTVVideo embed
          {
              'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
@@ -1014,7 +1061,9 @@ class GenericIE(InfoExtractor):
              }
  
          if not self._downloader.params.get('test', False) and not is_intentional:
-            self._downloader.report_warning('Falling back on generic information extractor.')
+            force = self._downloader.params.get('force_generic_extractor', False)
+            self._downloader.report_warning(
+                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
  
          if not full_response:
              request = compat_urllib_request.Request(url)
@@ -1388,6 +1437,11 @@ class GenericIE(InfoExtractor):
          if smotri_url:
              return self.url_result(smotri_url, 'Smotri')
  
+        # Look for embedded Myvi.ru player
+        myvi_url = MyviIE._extract_url(webpage)
+        if myvi_url:
+            return self.url_result(myvi_url)
+
          # Look for embeded soundcloud player
          mobj = re.search(
              r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
@@ -1467,8 +1521,8 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'), 'Zapiks')
  
          # Look for Kaltura embeds
-        mobj = re.search(
-            r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
+        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
+                re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
          if mobj is not None:
              return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
  
@@ -1530,6 +1584,16 @@ class GenericIE(InfoExtractor):
          if dmcloud_url:
              return self.url_result(dmcloud_url, 'DailymotionCloud')
  
+        # Look for OnionStudios embeds
+        onionstudios_url = OnionStudiosIE._extract_url(webpage)
+        if onionstudios_url:
+            return self.url_result(onionstudios_url)
+
+        # Look for SnagFilms embeds
+        snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
+        if snagfilms_url:
+            return self.url_result(snagfilms_url)
+
          # Look for AdobeTVVideo embeds
          mobj = re.search(
              r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py

index 397f1d42eea83b774791b6a53ce087f60a5dcca1..884700c52b90b53fdc8f581378d28611d7c36f33 100644 (file)
--- a/youtube_dl/extractor/gfycat.py
+++ b/youtube_dl/extractor/gfycat.py
@@ -6,12 +6,13 @@ from ..utils import (
      int_or_none,
      float_or_none,
      qualities,
+    ExtractorError,
  )
  
  
  class GfycatIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
+    _TESTS = [{
          'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
          'info_dict': {
              'id': 'DeadlyDecisiveGermanpinscher',
@@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):
              'categories': list,
              'age_limit': 0,
          }
-    }
+    }, {
+        'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
+        'info_dict': {
+            'id': 'JauntyTimelyAmazontreeboa',
+            'ext': 'mp4',
+            'title': 'JauntyTimelyAmazontreeboa',
+            'timestamp': 1411720126,
+            'upload_date': '20140926',
+            'uploader': 'anonymous',
+            'duration': 3.52,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'categories': list,
+            'age_limit': 0,
+        }
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
          gfy = self._download_json(
              'http://gfycat.com/cajax/get/%s' % video_id,
-            video_id, 'Downloading video info')['gfyItem']
+            video_id, 'Downloading video info')
+        if 'error' in gfy:
+            raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
+        gfy = gfy['gfyItem']
  
          title = gfy.get('title') or gfy['gfyName']
          description = gfy.get('description')
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py

index 6147596e4c5d082d54f975b7428a400679b9dc32..f006f0cb105dc9d7b0c1f495dbcd4c840597858a 100644 (file)
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -78,12 +78,7 @@ class GorillaVidIE(InfoExtractor):
          if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
              raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  
-        fields = dict(re.findall(r'''(?x)<input\s+
-            type="hidden"\s+
-            name="([^"]+)"\s+
-            (?:id="[^"]+"\s+)?
-            value="([^"]*)"
-            ''', webpage))
+        fields = self._hidden_inputs(webpage)
  
          if fields['op'] == 'download1':
              countdown = int_or_none(self._search_regex(
diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py

index 63d87b74cc2d5258960fce3b0c6cd5eca48a0b11..f5aa73d18b47ff225b7e7e332051b97583ca8237 100644 (file)
--- a/youtube_dl/extractor/hentaistigma.py
+++ b/youtube_dl/extractor/hentaistigma.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  
  
@@ -19,20 +17,19 @@ class HentaiStigmaIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
  
          webpage = self._download_webpage(url, video_id)
  
          title = self._html_search_regex(
-            r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
+            r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
              webpage, 'title')
          wrap_url = self._html_search_regex(
-            r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
+            r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
          wrap_webpage = self._download_webpage(wrap_url, video_id)
  
          video_url = self._html_search_regex(
-            r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
+            r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py

index 704d0285d3e1c2ce10e8f3929543c6c66b0fd58a..a3154cfdeccf9b4c18cf5a5b01f7944243fb1509 100644 (file)
--- a/youtube_dl/extractor/hostingbulk.py
+++ b/youtube_dl/extractor/hostingbulk.py
@@ -58,11 +58,7 @@ class HostingBulkIE(InfoExtractor):
              r'<img src="([^"]+)".+?class="pic"',
              webpage, 'thumbnail', fatal=False)
  
-        fields = dict(re.findall(r'''(?x)<input\s+
-            type="hidden"\s+
-            name="([^"]+)"\s+
-            value="([^"]*)"
-            ''', webpage))
+        fields = self._hidden_inputs(webpage)
  
          request = compat_urllib_request.Request(url, urlencode_postdata(fields))
          request.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py

index 3f7d6666c0810e545c0f285dcf70689806c4dac7..16677f179ecd77040e8fdc42bfb9e4095aa1774a 100644 (file)
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -1,8 +1,7 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
+from ..utils import parse_iso8601
  
  
  class HowcastIE(InfoExtractor):
@@ -13,29 +12,31 @@ class HowcastIE(InfoExtractor):
          'info_dict': {
              'id': '390161',
              'ext': 'mp4',
-            'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
              'title': 'How to Tie a Square Knot Properly',
-        }
+            'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
+            'timestamp': 1276081287,
+            'upload_date': '20100609',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
  
-        video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
  
-        self.report_extraction(video_id)
-
-        video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
-                                       webpage, 'video URL')
-
-        video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
-                                                    webpage, 'description', fatal=False)
+        embed_code = self._search_regex(
+            r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
+            webpage, 'ooyala embed code')
  
          return {
+            '_type': 'url_transparent',
+            'ie_key': 'Ooyala',
+            'url': 'ooyala:%s' % embed_code,
              'id': video_id,
-            'url': video_url,
-            'title': self._og_search_title(webpage),
-            'description': video_description,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'timestamp': parse_iso8601(self._html_search_meta(
+                'article:published_time', webpage, 'timestamp')),
          }
diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py

index 0847074eeb0f0f258db079495c7f30777b2d1838..65712abc28c3cc68cab7052ab709b2c1e6500cb5 100644 (file)
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
  
  
  class InaIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
      _TEST = {
          'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
          'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py

index f25f43664e262b25473557c5f11dae91e697e3f6..91a1b3ccb70d41027d7ecfa41f1b606ead8f056a 100644 (file)
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -5,13 +5,14 @@ import base64
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
+    compat_urlparse,
  )
  
  
  class InfoQIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
+    _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
          'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
          'info_dict': {
@@ -20,7 +21,10 @@ class InfoQIE(InfoExtractor):
              'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
              'title': 'A Few of My Favorite [Python] Things',
          },
-    }
+    }, {
+        'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
@@ -42,7 +46,7 @@ class InfoQIE(InfoExtractor):
          video_id, extension = video_filename.split('.')
  
          http_base = self._search_regex(
-            r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
+            r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
              'HTTP base URL')
  
          formats = [{
@@ -52,7 +56,7 @@ class InfoQIE(InfoExtractor):
              'play_path': playpath,
          }, {
              'format_id': 'http',
-            'url': http_base + real_id,
+            'url': compat_urlparse.urljoin(url, http_base) + real_id,
          }]
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py

index 9106dd07491c18b6521ad3894d16ea56238aa9aa..0f6707d7cc7a9c3ab563ced28e8d15a23c8a97eb 100644 (file)
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -20,6 +20,7 @@ from ..utils import (
  
  class IqiyiIE(InfoExtractor):
      IE_NAME = 'iqiyi'
+    IE_DESC = '爱奇艺'
  
      _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
  
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py

index d0720ff561c16e8c0816c5ff7ab333e54c297dbc..1df084d87ae4c712d9bcfa1aac6d6367641287b5 100644 (file)
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -8,9 +8,9 @@ from .common import InfoExtractor
  
  
  class JeuxVideoIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
+    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
          'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
          'info_dict': {
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
              'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
              'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
          },
-    }
+    }, {
+        'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py

new file mode 100644 (file)

index 0000000..1077846
--- /dev/null
+++ b/youtube_dl/extractor/kuwo.py
@@ -0,0 +1,314 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_id,
+    clean_html,
+    ExtractorError,
+    remove_start,
+)
+
+
+class KuwoBaseIE(InfoExtractor):
+    _FORMATS = [
+        {'format': 'ape', 'ext': 'ape', 'preference': 100},
+        {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
+        {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
+        {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
+        {'format': 'wma', 'ext': 'wma', 'preference': 20},
+        {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
+    ]
+
+    def _get_formats(self, song_id):
+        formats = []
+        for file_format in self._FORMATS:
+            song_url = self._download_webpage(
+                'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
+                (file_format['ext'], file_format.get('br', ''), song_id),
+                song_id, note='Download %s url info' % file_format['format'],
+            )
+            if song_url.startswith('http://') or song_url.startswith('https://'):
+                formats.append({
+                    'url': song_url,
+                    'format_id': file_format['format'],
+                    'format': file_format['format'],
+                    'preference': file_format['preference'],
+                    'abr': file_format.get('abr'),
+                })
+        self._sort_formats(formats)
+        return formats
+
+
+class KuwoIE(KuwoBaseIE):
+    IE_NAME = 'kuwo:song'
+    IE_DESC = '酷我音乐'
+    _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
+    _TESTS = [{
+        'url': 'http://www.kuwo.cn/yinyue/635632/',
+        'info_dict': {
+            'id': '635632',
+            'ext': 'ape',
+            'title': '爱我别走',
+            'creator': '张震岳',
+            'upload_date': '20080122',
+            'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
+        },
+    }, {
+        'url': 'http://www.kuwo.cn/yinyue/6446136/',
+        'info_dict': {
+            'id': '6446136',
+            'ext': 'mp3',
+            'title': '心',
+            'creator': 'IU',
+            'upload_date': '20150518',
+        },
+        'params': {
+            'format': 'mp3-320'
+        },
+    }]
+
+    def _real_extract(self, url):
+        song_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, song_id, note='Download song detail info',
+            errnote='Unable to get song detail info')
+
+        song_name = self._html_search_regex(
+            r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
+        singer_name = self._html_search_regex(
+            r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
+            webpage, 'singer name', fatal=False)
+        lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
+        if lrc_content == '暂无':     # indicates no lyrics
+            lrc_content = None
+
+        formats = self._get_formats(song_id)
+
+        album_id = self._html_search_regex(
+            r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
+            webpage, 'album id', fatal=False)
+
+        publish_time = None
+        if album_id is not None:
+            album_info_page = self._download_webpage(
+                'http://www.kuwo.cn/album/%s/' % album_id, song_id,
+                note='Download album detail info',
+                errnote='Unable to get album detail info')
+
+            publish_time = self._html_search_regex(
+                r'发行时间：(\d{4}-\d{2}-\d{2})', album_info_page,
+                'publish time', fatal=False)
+            if publish_time:
+                publish_time = publish_time.replace('-', '')
+
+        return {
+            'id': song_id,
+            'title': song_name,
+            'creator': singer_name,
+            'upload_date': publish_time,
+            'description': lrc_content,
+            'formats': formats,
+        }
+
+
+class KuwoAlbumIE(InfoExtractor):
+    IE_NAME = 'kuwo:album'
+    IE_DESC = '酷我音乐 - 专辑'
+    _VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
+    _TEST = {
+        'url': 'http://www.kuwo.cn/album/502294/',
+        'info_dict': {
+            'id': '502294',
+            'title': 'M',
+            'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c',
+        },
+        'playlist_count': 2,
+    }
+
+    def _real_extract(self, url):
+        album_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            url, album_id, note='Download album info',
+            errnote='Unable to get album info')
+
+        album_name = self._html_search_regex(
+            r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
+            'album name')
+        album_intro = remove_start(
+            clean_html(get_element_by_id('intro', webpage)),
+            '%s简介：' % album_name)
+
+        entries = [
+            self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+                r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
+                webpage)
+        ]
+        return self.playlist_result(entries, album_id, album_name, album_intro)
+
+
+class KuwoChartIE(InfoExtractor):
+    IE_NAME = 'kuwo:chart'
+    IE_DESC = '酷我音乐 - 排行榜'
+    _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
+    _TEST = {
+        'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
+        'info_dict': {
+            'id': '香港中文龙虎榜',
+            'title': '香港中文龙虎榜',
+            'description': 're:\d{4}第\d{2}期',
+        },
+        'playlist_mincount': 10,
+    }
+
+    def _real_extract(self, url):
+        chart_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, chart_id, note='Download chart info',
+            errnote='Unable to get chart info')
+
+        chart_name = self._html_search_regex(
+            r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
+
+        chart_desc = self._html_search_regex(
+            r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
+
+        entries = [
+            self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
+        ]
+        return self.playlist_result(entries, chart_id, chart_name, chart_desc)
+
+
+class KuwoSingerIE(InfoExtractor):
+    IE_NAME = 'kuwo:singer'
+    IE_DESC = '酷我音乐 - 歌手'
+    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
+        'info_dict': {
+            'id': 'bruno+mars',
+            'title': 'Bruno Mars',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
+        'info_dict': {
+            'id': 'Ali',
+            'title': 'Ali',
+        },
+        'playlist_mincount': 95,
+    }]
+
+    def _real_extract(self, url):
+        singer_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, singer_id, note='Download singer info',
+            errnote='Unable to get singer info')
+
+        singer_name = self._html_search_regex(
+            r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
+        )
+
+        entries = []
+        first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
+        for page_num in itertools.count(1):
+            webpage = self._download_webpage(
+                'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
+                singer_id, note='Download song list page #%d' % page_num,
+                errnote='Unable to get song list page #%d' % page_num)
+
+            entries.extend([
+                self.url_result(song_url, 'Kuwo') for song_url in re.findall(
+                    r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
+                    webpage)
+            ][:10 if first_page_only else None])
+
+            if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
+                break
+
+        return self.playlist_result(entries, singer_id, singer_name)
+
+
+class KuwoCategoryIE(InfoExtractor):
+    IE_NAME = 'kuwo:category'
+    IE_DESC = '酷我音乐 - 分类'
+    _VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
+    _TEST = {
+        'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
+        'info_dict': {
+            'id': '86375',
+            'title': '八十年代精选',
+            'description': '这些都是属于八十年代的回忆！',
+        },
+        'playlist_count': 30,
+    }
+
+    def _real_extract(self, url):
+        category_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, category_id, note='Download category info',
+            errnote='Unable to get category info')
+
+        category_name = self._html_search_regex(
+            r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
+
+        category_desc = remove_start(
+            get_element_by_id('intro', webpage).strip(),
+            '%s简介：' % category_name)
+
+        jsonm = self._parse_json(self._html_search_regex(
+            r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
+
+        entries = [
+            self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
+            for song in jsonm['musiclist']
+        ]
+        return self.playlist_result(entries, category_id, category_name, category_desc)
+
+
+class KuwoMvIE(KuwoBaseIE):
+    IE_NAME = 'kuwo:mv'
+    IE_DESC = '酷我音乐 - MV'
+    _VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
+    _TEST = {
+        'url': 'http://www.kuwo.cn/mv/6480076/',
+        'info_dict': {
+            'id': '6480076',
+            'ext': 'mkv',
+            'title': '我们家MV',
+            'creator': '2PM',
+        },
+    }
+    _FORMATS = KuwoBaseIE._FORMATS + [
+        {'format': 'mkv', 'ext': 'mkv', 'preference': 250},
+        {'format': 'mp4', 'ext': 'mp4', 'preference': 200},
+    ]
+
+    def _real_extract(self, url):
+        song_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, song_id, note='Download mv detail info: %s' % song_id,
+            errnote='Unable to get mv detail info: %s' % song_id)
+
+        mobj = re.search(
+            r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
+            webpage)
+        if mobj:
+            song_name = mobj.group('song')
+            singer_name = mobj.group('singer')
+        else:
+            raise ExtractorError('Unable to find song or singer names')
+
+        formats = self._get_formats(song_id)
+
+        return {
+            'id': song_id,
+            'title': song_name,
+            'creator': singer_name,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py

index da896caf160f6f0a0ae49167ae546cd9da4d45fa..ba2ae80853d36ce1b9a98d109c2580bcdde65d5a 100644 (file)
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -19,6 +19,7 @@ from ..utils import (
  
  
  class LetvIE(InfoExtractor):
+    IE_DESC = '乐视网'
      _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
  
      _TESTS = [{
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py

index cfd3b14f4bfd755a7600701e86900ece12b0c3ac..a00f6e5e5eb1d398ef0776d8b20dcb1dd51ec082 100644 (file)
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -30,13 +30,13 @@ class LyndaBaseIE(InfoExtractor):
              return
  
          login_form = {
-            'username': username,
-            'password': password,
+            'username': username.encode('utf-8'),
+            'password': password.encode('utf-8'),
              'remember': 'false',
              'stayPut': 'false'
          }
          request = compat_urllib_request.Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
          login_page = self._download_webpage(
              request, None, 'Logging in as %s' % username)
  
@@ -65,7 +65,7 @@ class LyndaBaseIE(InfoExtractor):
                      'stayPut': 'false',
                  }
                  request = compat_urllib_request.Request(
-                    self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
+                    self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
                  login_page = self._download_webpage(
                      request, None,
                      'Confirming log in and log out from another device')
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py

index 5b9b9fbcd0844897d6d63305ed00729e70c7f4fb..4557a2b13b3e47a75242ebd4a5c095bf17cbaacf 100644 (file)
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
  
          # get metadata
          metadata_url = META_DATA_URL_TEMPLATE % video_id
-        metadata = self._download_xml(metadata_url, video_id)
+        metadata = self._download_xml(
+            metadata_url, video_id, transform_source=lambda s: s.strip())
  
          # extract values from metadata
          url_flv_el = metadata.find('url_flv')
diff --git a/youtube_dl/extractor/myvi.py b/youtube_dl/extractor/myvi.py

new file mode 100644 (file)

index 0000000..4c65be1
--- /dev/null
+++ b/youtube_dl/extractor/myvi.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .vimple import SprutoBaseIE
+
+
+class MyviIE(SprutoBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        myvi\.(?:ru/player|tv)/
+                            (?:
+                                (?:
+                                    embed/html|
+                                    flash|
+                                    api/Video/Get
+                                )/|
+                                content/preloader\.swf\?.*\bid=
+                            )
+                            (?P<id>[\da-zA-Z_-]+)
+                    '''
+    _TESTS = [{
+        'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
+        'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
+        'info_dict': {
+            'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
+            'ext': 'mp4',
+            'title': 'хозяин жизни',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 25,
+        },
+    }, {
+        'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
+        'only_matching': True,
+    }, {
+        'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
+        'only_matching': True,
+    }, {
+        'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
+        'only_matching': True,
+    }, {
+        'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        spruto = self._download_json(
+            'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
+
+        return self._extract_spruto(spruto, video_id)
diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py

new file mode 100644 (file)

index 0000000..a8e0a64
--- /dev/null
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -0,0 +1,459 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from hashlib import md5
+from base64 import b64encode
+from datetime import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+    compat_urllib_parse,
+    compat_str,
+    compat_itertools_count,
+)
+
+
+class NetEaseMusicBaseIE(InfoExtractor):
+    _FORMATS = ['bMusic', 'mMusic', 'hMusic']
+    _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
+    _API_BASE = 'http://music.163.com/api/'
+
+    @classmethod
+    def _encrypt(cls, dfsid):
+        salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
+        string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
+        salt_len = len(salt_bytes)
+        for i in range(len(string_bytes)):
+            string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
+        m = md5()
+        m.update(bytes(string_bytes))
+        result = b64encode(m.digest()).decode('ascii')
+        return result.replace('/', '_').replace('+', '-')
+
+    @classmethod
+    def extract_formats(cls, info):
+        formats = []
+        for song_format in cls._FORMATS:
+            details = info.get(song_format)
+            if not details:
+                continue
+            formats.append({
+                'url': 'http://m1.music.126.net/%s/%s.%s' %
+                       (cls._encrypt(details['dfsId']), details['dfsId'],
+                        details['extension']),
+                'ext': details.get('extension'),
+                'abr': details.get('bitrate', 0) / 1000,
+                'format_id': song_format,
+                'filesize': details.get('size'),
+                'asr': details.get('sr')
+            })
+        return formats
+
+    @classmethod
+    def convert_milliseconds(cls, ms):
+        return int(round(ms / 1000.0))
+
+    def query_api(self, endpoint, video_id, note):
+        req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
+        req.add_header('Referer', self._API_BASE)
+        return self._download_json(req, video_id, note)
+
+
+class NetEaseMusicIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:song'
+    IE_DESC = '网易云音乐'
+    _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://music.163.com/#/song?id=32102397',
+        'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+        'info_dict': {
+            'id': '32102397',
+            'ext': 'mp3',
+            'title': 'Bad Blood (feat. Kendrick Lamar)',
+            'creator': 'Taylor Swift / Kendrick Lamar',
+            'upload_date': '20150517',
+            'timestamp': 1431878400,
+            'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
+        },
+    }, {
+        'note': 'No lyrics translation.',
+        'url': 'http://music.163.com/#/song?id=29822014',
+        'info_dict': {
+            'id': '29822014',
+            'ext': 'mp3',
+            'title': '听见下雨的声音',
+            'creator': '周杰伦',
+            'upload_date': '20141225',
+            'timestamp': 1419523200,
+            'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
+        },
+    }, {
+        'note': 'No lyrics.',
+        'url': 'http://music.163.com/song?id=17241424',
+        'info_dict': {
+            'id': '17241424',
+            'ext': 'mp3',
+            'title': 'Opus 28',
+            'creator': 'Dustin O\'Halloran',
+            'upload_date': '20080211',
+            'timestamp': 1202745600,
+        },
+    }, {
+        'note': 'Has translated name.',
+        'url': 'http://music.163.com/#/song?id=22735043',
+        'info_dict': {
+            'id': '22735043',
+            'ext': 'mp3',
+            'title': '소원을 말해봐 (Genie)',
+            'creator': '少女时代',
+            'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
+            'upload_date': '20100127',
+            'timestamp': 1264608000,
+            'alt_title': '说出愿望吧(Genie)',
+        }
+    }]
+
+    def _process_lyrics(self, lyrics_info):
+        original = lyrics_info.get('lrc', {}).get('lyric')
+        translated = lyrics_info.get('tlyric', {}).get('lyric')
+
+        if not translated:
+            return original
+
+        lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
+        original_ts_texts = re.findall(lyrics_expr, original)
+        translation_ts_dict = dict(
+            (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
+        )
+        lyrics = '\n'.join([
+            '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
+            for time_stamp, text in original_ts_texts
+        ])
+        return lyrics
+
+    def _real_extract(self, url):
+        song_id = self._match_id(url)
+
+        params = {
+            'id': song_id,
+            'ids': '[%s]' % song_id
+        }
+        info = self.query_api(
+            'song/detail?' + compat_urllib_parse.urlencode(params),
+            song_id, 'Downloading song info')['songs'][0]
+
+        formats = self.extract_formats(info)
+        self._sort_formats(formats)
+
+        lyrics_info = self.query_api(
+            'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
+            song_id, 'Downloading lyrics data')
+        lyrics = self._process_lyrics(lyrics_info)
+
+        alt_title = None
+        if info.get('transNames'):
+            alt_title = '/'.join(info.get('transNames'))
+
+        return {
+            'id': song_id,
+            'title': info['name'],
+            'alt_title': alt_title,
+            'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
+            'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
+            'thumbnail': info.get('album', {}).get('picUrl'),
+            'duration': self.convert_milliseconds(info.get('duration', 0)),
+            'description': lyrics,
+            'formats': formats,
+        }
+
+
+class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:album'
+    IE_DESC = '网易云音乐 - 专辑'
+    _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://music.163.com/#/album?id=220780',
+        'info_dict': {
+            'id': '220780',
+            'title': 'B\'day',
+        },
+        'playlist_count': 23,
+    }
+
+    def _real_extract(self, url):
+        album_id = self._match_id(url)
+
+        info = self.query_api(
+            'album/%s?id=%s' % (album_id, album_id),
+            album_id, 'Downloading album data')['album']
+
+        name = info['name']
+        desc = info.get('description')
+        entries = [
+            self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+                            'NetEaseMusic', song['id'])
+            for song in info['songs']
+        ]
+        return self.playlist_result(entries, album_id, name, desc)
+
+
+class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:singer'
+    IE_DESC = '网易云音乐 - 歌手'
+    _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
+    _TESTS = [{
+        'note': 'Singer has aliases.',
+        'url': 'http://music.163.com/#/artist?id=10559',
+        'info_dict': {
+            'id': '10559',
+            'title': '张惠妹 - aMEI;阿密特',
+        },
+        'playlist_count': 50,
+    }, {
+        'note': 'Singer has translated name.',
+        'url': 'http://music.163.com/#/artist?id=124098',
+        'info_dict': {
+            'id': '124098',
+            'title': '李昇基 - 이승기',
+        },
+        'playlist_count': 50,
+    }]
+
+    def _real_extract(self, url):
+        singer_id = self._match_id(url)
+
+        info = self.query_api(
+            'artist/%s?id=%s' % (singer_id, singer_id),
+            singer_id, 'Downloading singer data')
+
+        name = info['artist']['name']
+        if info['artist']['trans']:
+            name = '%s - %s' % (name, info['artist']['trans'])
+        if info['artist']['alias']:
+            name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
+
+        entries = [
+            self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+                            'NetEaseMusic', song['id'])
+            for song in info['hotSongs']
+        ]
+        return self.playlist_result(entries, singer_id, name)
+
+
+class NetEaseMusicListIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:playlist'
+    IE_DESC = '网易云音乐 - 歌单'
+    _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://music.163.com/#/playlist?id=79177352',
+        'info_dict': {
+            'id': '79177352',
+            'title': 'Billboard 2007 Top 100',
+            'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
+        },
+        'playlist_count': 99,
+    }, {
+        'note': 'Toplist/Charts sample',
+        'url': 'http://music.163.com/#/discover/toplist?id=3733003',
+        'info_dict': {
+            'id': '3733003',
+            'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+            'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
+        },
+        'playlist_count': 50,
+    }]
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+
+        info = self.query_api(
+            'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
+            list_id, 'Downloading playlist data')['result']
+
+        name = info['name']
+        desc = info.get('description')
+
+        if info.get('specialType') == 10:  # is a chart/toplist
+            datestamp = datetime.fromtimestamp(
+                self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
+            name = '%s %s' % (name, datestamp)
+
+        entries = [
+            self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
+                            'NetEaseMusic', song['id'])
+            for song in info['tracks']
+        ]
+        return self.playlist_result(entries, list_id, name, desc)
+
+
+class NetEaseMusicMvIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:mv'
+    IE_DESC = '网易云音乐 - MV'
+    _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://music.163.com/#/mv?id=415350',
+        'info_dict': {
+            'id': '415350',
+            'ext': 'mp4',
+            'title': '이럴거면 그러지말지',
+            'description': '白雅言自作曲唱甜蜜爱情',
+            'creator': '白雅言',
+            'upload_date': '20150520',
+        },
+    }
+
+    def _real_extract(self, url):
+        mv_id = self._match_id(url)
+
+        info = self.query_api(
+            'mv/detail?id=%s&type=mp4' % mv_id,
+            mv_id, 'Downloading mv info')['data']
+
+        formats = [
+            {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
+            for brs, mv_url in info['brs'].items()
+        ]
+        self._sort_formats(formats)
+
+        return {
+            'id': mv_id,
+            'title': info['name'],
+            'description': info.get('desc') or info.get('briefDesc'),
+            'creator': info['artistName'],
+            'upload_date': info['publishTime'].replace('-', ''),
+            'formats': formats,
+            'thumbnail': info.get('cover'),
+            'duration': self.convert_milliseconds(info.get('duration', 0)),
+        }
+
+
+class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:program'
+    IE_DESC = '网易云音乐 - 电台节目'
+    _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://music.163.com/#/program?id=10109055',
+        'info_dict': {
+            'id': '10109055',
+            'ext': 'mp3',
+            'title': '不丹足球背后的故事',
+            'description': '喜马拉雅人的足球梦 ...',
+            'creator': '大话西藏',
+            'timestamp': 1434179342,
+            'upload_date': '20150613',
+            'duration': 900,
+        },
+    }, {
+        'note': 'This program has accompanying songs.',
+        'url': 'http://music.163.com/#/program?id=10141022',
+        'info_dict': {
+            'id': '10141022',
+            'title': '25岁，你是自在如风的少年<27°C>',
+            'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+        },
+        'playlist_count': 4,
+    }, {
+        'note': 'This program has accompanying songs.',
+        'url': 'http://music.163.com/#/program?id=10141022',
+        'info_dict': {
+            'id': '10141022',
+            'ext': 'mp3',
+            'title': '25岁，你是自在如风的少年<27°C>',
+            'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+            'timestamp': 1434450841,
+            'upload_date': '20150616',
+        },
+        'params': {
+            'noplaylist': True
+        }
+    }]
+
+    def _real_extract(self, url):
+        program_id = self._match_id(url)
+
+        info = self.query_api(
+            'dj/program/detail?id=%s' % program_id,
+            program_id, 'Downloading program info')['program']
+
+        name = info['name']
+        description = info['description']
+
+        if not info['songs'] or self._downloader.params.get('noplaylist'):
+            if info['songs']:
+                self.to_screen(
+                    'Downloading just the main audio %s because of --no-playlist'
+                    % info['mainSong']['id'])
+
+            formats = self.extract_formats(info['mainSong'])
+            self._sort_formats(formats)
+
+            return {
+                'id': program_id,
+                'title': name,
+                'description': description,
+                'creator': info['dj']['brand'],
+                'timestamp': self.convert_milliseconds(info['createTime']),
+                'thumbnail': info['coverUrl'],
+                'duration': self.convert_milliseconds(info.get('duration', 0)),
+                'formats': formats,
+            }
+
+        self.to_screen(
+            'Downloading playlist %s - add --no-playlist to just download the main audio %s'
+            % (program_id, info['mainSong']['id']))
+
+        song_ids = [info['mainSong']['id']]
+        song_ids.extend([song['id'] for song in info['songs']])
+        entries = [
+            self.url_result('http://music.163.com/#/song?id=%s' % song_id,
+                            'NetEaseMusic', song_id)
+            for song_id in song_ids
+        ]
+        return self.playlist_result(entries, program_id, name, description)
+
+
+class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
+    IE_NAME = 'netease:djradio'
+    IE_DESC = '网易云音乐 - 电台'
+    _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://music.163.com/#/djradio?id=42',
+        'info_dict': {
+            'id': '42',
+            'title': '声音蔓延',
+            'description': 'md5:766220985cbd16fdd552f64c578a6b15'
+        },
+        'playlist_mincount': 40,
+    }
+    _PAGE_SIZE = 1000
+
+    def _real_extract(self, url):
+        dj_id = self._match_id(url)
+
+        name = None
+        desc = None
+        entries = []
+        for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
+            info = self.query_api(
+                'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
+                % (self._PAGE_SIZE, dj_id, offset),
+                dj_id, 'Downloading dj programs - %d' % offset)
+
+            entries.extend([
+                self.url_result(
+                    'http://music.163.com/#/program?id=%s' % program['id'],
+                    'NetEaseMusicProgram', program['id'])
+                for program in info['programs']
+            ])
+
+            if name is None:
+                radio = info['programs'][0]['radio']
+                name = radio['name']
+                desc = radio['desc']
+
+            if not info['more']:
+                break
+
+        return self.playlist_result(entries, dj_id, name, desc)
diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py

index 85fcad06b51dc9ce87bdd563043c92a126cc8eea..5a9e73cd66a1b1224bdec848722f5e9d14f65c38 100644 (file)
--- a/youtube_dl/extractor/newstube.py
+++ b/youtube_dl/extractor/newstube.py
@@ -31,7 +31,7 @@ class NewstubeIE(InfoExtractor):
          page = self._download_webpage(url, video_id, 'Downloading page')
  
          video_guid = self._html_search_regex(
-            r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+            r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
              page, 'video GUID')
  
          player = self._download_xml(
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py

index d1b7cff4cfbf30c76c52ae98ad247e8907be6abd..c10784f6b7321395e69c86ab06f91f8d3b37b655 100644 (file)
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601
  
  
  class NextMediaIE(InfoExtractor):
+    IE_DESC = '蘋果日報'
      _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
      _TESTS = [{
          'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
@@ -66,6 +67,7 @@ class NextMediaIE(InfoExtractor):
  
  
  class NextMediaActionNewsIE(NextMediaIE):
+    IE_DESC = '蘋果日報 - 動新聞'
      _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
      _TESTS = [{
          'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
@@ -90,6 +92,7 @@ class NextMediaActionNewsIE(NextMediaIE):
  
  
  class AppleDailyIE(NextMediaIE):
+    IE_DESC = '臺灣蘋果日報'
      _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
      _TESTS = [{
          'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py

index 5bbd2dcf66294f5f0e21b6aae000f9ddecd5c051..a53e27b274eaa21ac15a1dc5077001d520832696 100644 (file)
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -195,7 +195,7 @@ class NocoIE(InfoExtractor):
          if episode_number:
              title += ' #' + compat_str(episode_number)
          if episode:
-            title += ' - ' + episode
+            title += ' - ' + compat_str(episode)
  
          description = show.get('show_resume') or show.get('family_resume')
  
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py

index 173e46cd8b78dfa0dcdf33b6d3c280bf40a70681..0b5ff47600559e50a2282b184b23d3fc7263d46d 100644 (file)
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -133,7 +133,7 @@ class NowTVIE(InfoExtractor):
          station = mobj.group('station')
  
          info = self._download_json(
-            'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+            'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
              display_id)
  
          video_id = compat_str(info['id'])
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py

index 5d84485714b9f360d47c8676710e9c3e6d9578c7..0c2d02c108ed425cf4688c34aa2a826ddaa400b4 100644 (file)
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,5 +1,7 @@
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      fix_xml_ampersands,
@@ -7,7 +9,6 @@ from ..utils import (
      qualities,
      strip_jsonp,
      unified_strdate,
-    url_basename,
  )
  
  
@@ -16,13 +17,42 @@ class NPOBaseIE(InfoExtractor):
          token_page = self._download_webpage(
              'http://ida.omroep.nl/npoplayer/i.js',
              video_id, note='Downloading token')
-        return self._search_regex(
+        token = self._search_regex(
              r'npoplayer\.token = "(.+?)"', token_page, 'token')
+        # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js
+        token_l = list(token)
+        first = second = None
+        for i in range(5, len(token_l) - 4):
+            if token_l[i].isdigit():
+                if first is None:
+                    first = i
+                elif second is None:
+                    second = i
+        if first is None or second is None:
+            first = 12
+            second = 13
+
+        token_l[first], token_l[second] = token_l[second], token_l[first]
+
+        return ''.join(token_l)
  
  
  class NPOIE(NPOBaseIE):
-    IE_NAME = 'npo.nl'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
+    IE_NAME = 'npo'
+    IE_DESC = 'npo.nl and ntr.nl'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        npo:|
+                        https?://
+                            (?:www\.)?
+                            (?:
+                                npo\.nl/(?!live|radio)(?:[^/]+/){2}|
+                                ntr\.nl/(?:[^/]+/){2,}|
+                                omroepwnl\.nl/video/fragment/[^/]+__
+                            )
+                        )
+                        (?P<id>[^/?#]+)
+                '''
  
      _TESTS = [
          {
@@ -42,7 +72,7 @@ class NPOIE(NPOBaseIE):
              'info_dict': {
                  'id': 'VARA_101191800',
                  'ext': 'm4v',
-                'title': 'De Mega Mike & Mega Thomas show',
+                'title': 'De Mega Mike & Mega Thomas show: The best of.',
                  'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
                  'upload_date': '20090227',
                  'duration': 2400,
@@ -54,8 +84,8 @@ class NPOIE(NPOBaseIE):
              'info_dict': {
                  'id': 'VPWON_1169289',
                  'ext': 'm4v',
-                'title': 'Tegenlicht',
-                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'title': 'Tegenlicht: De toekomst komt uit Afrika',
+                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                  'upload_date': '20130225',
                  'duration': 3000,
              },
@@ -84,6 +114,30 @@ class NPOIE(NPOBaseIE):
                  'title': 'Hoe gaat Europa verder na Parijs?',
              },
          },
+        {
+            'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+            'md5': '01c6a2841675995da1f0cf776f03a9c3',
+            'info_dict': {
+                'id': 'VPWON_1233944',
+                'ext': 'm4v',
+                'title': 'Aap, poot, pies',
+                'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+                'upload_date': '20150508',
+                'duration': 599,
+            },
+        },
+        {
+            'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+            'md5': 'd30cd8417b8b9bca1fdff27428860d08',
+            'info_dict': {
+                'id': 'POW_00996502',
+                'ext': 'm4v',
+                'title': '''"Dit is wel een 'landslide'..."''',
+                'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+                'upload_date': '20150508',
+                'duration': 462,
+            },
+        }
      ]
  
      def _real_extract(self, url):
@@ -92,12 +146,24 @@ class NPOIE(NPOBaseIE):
  
      def _get_info(self, video_id):
          metadata = self._download_json(
-            'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
+            'http://e.omroep.nl/metadata/%s' % video_id,
              video_id,
              # We have to remove the javascript callback
              transform_source=strip_jsonp,
          )
  
+        # For some videos actual video id (prid) is different (e.g. for
+        # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
+        # video id is POMS_WNL_853698 but prid is POW_00996502)
+        video_id = metadata.get('prid') or video_id
+
+        # titel is too generic in some cases so utilize aflevering_titel as well
+        # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
+        title = metadata['titel']
+        sub_title = metadata.get('aflevering_titel')
+        if sub_title and sub_title != title:
+            title += ': %s' % sub_title
+
          token = self._get_token(video_id)
  
          formats = []
@@ -170,8 +236,8 @@ class NPOIE(NPOBaseIE):
  
          return {
              'id': video_id,
-            'title': metadata['titel'],
-            'description': metadata['info'],
+            'title': title,
+            'description': metadata.get('info'),
              'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
              'upload_date': unified_strdate(metadata.get('gidsdatum')),
              'duration': parse_duration(metadata.get('tijdsduur')),
@@ -340,9 +406,8 @@ class NPORadioFragmentIE(InfoExtractor):
          }
  
  
-class TegenlichtVproIE(NPOIE):
-    IE_NAME = 'tegenlicht.vpro.nl'
-    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+class VPROIE(NPOIE):
+    _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
  
      _TESTS = [
          {
@@ -351,17 +416,72 @@ class TegenlichtVproIE(NPOIE):
              'info_dict': {
                  'id': 'VPWON_1169289',
                  'ext': 'm4v',
-                'title': 'Tegenlicht',
-                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'title': 'De toekomst komt uit Afrika',
+                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                  'upload_date': '20130225',
              },
          },
+        {
+            'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
+            'info_dict': {
+                'id': 'sergio-herman',
+                'title': 'Sergio Herman: Fucking perfect',
+            },
+            'playlist_count': 2,
+        },
+        {
+            # playlist with youtube embed
+            'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
+            'info_dict': {
+                'id': 'education-education',
+                'title': '2Doc',
+            },
+            'playlist_count': 2,
+        }
      ]
  
      def _real_extract(self, url):
-        name = url_basename(url)
-        webpage = self._download_webpage(url, name)
-        urn = self._html_search_meta('mediaurn', webpage)
-        info_page = self._download_json(
-            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
-        return self._get_info(info_page['mid'])
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+            for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
+        ]
+
+        playlist_title = self._search_regex(
+            r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
+            webpage, 'playlist title', default=None) or self._og_search_title(webpage)
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class WNLIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+
+    _TEST = {
+        'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+        'info_dict': {
+            'id': 'vandaag-de-dag-6-mei',
+            'title': 'Vandaag de Dag 6 mei',
+        },
+        'playlist_count': 4,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('npo:%s' % video_id, 'NPO')
+            for video_id, part in re.findall(
+                r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
+        ]
+
+        playlist_title = self._html_search_regex(
+            r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
+            webpage, 'playlist title')
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index cc70c295014f95fcb7e74f2f009889b5ca135663..d066a96db137ee3fb2c36f712803622e73b4aa40 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -13,7 +13,7 @@ from ..utils import (
  
  
  class NRKIE(InfoExtractor):
-    _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
+    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
  
      _TESTS = [
          {
@@ -76,7 +76,7 @@ class NRKIE(InfoExtractor):
  
  
  class NRKPlaylistIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
  
      _TESTS = [{
          'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
@@ -116,11 +116,12 @@ class NRKPlaylistIE(InfoExtractor):
  
  
  class NRKTVIE(InfoExtractor):
-    _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+    IE_DESC = 'NRK TV and NRK Radio'
+    _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
  
      _TESTS = [
          {
-            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
+            'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
              'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
              'info_dict': {
                  'id': 'MUHH48000314',
@@ -132,7 +133,7 @@ class NRKTVIE(InfoExtractor):
              },
          },
          {
-            'url': 'http://tv.nrk.no/program/mdfp15000514',
+            'url': 'https://tv.nrk.no/program/mdfp15000514',
              'md5': '383650ece2b25ecec996ad7b5bb2a384',
              'info_dict': {
                  'id': 'mdfp15000514',
@@ -145,7 +146,7 @@ class NRKTVIE(InfoExtractor):
          },
          {
              # single playlist video
-            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+            'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
              'md5': 'adbd1dbd813edaf532b0a253780719c2',
              'info_dict': {
                  'id': 'MSPO40010515-part2',
@@ -157,7 +158,7 @@ class NRKTVIE(InfoExtractor):
              'skip': 'Only works from Norway',
          },
          {
-            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+            'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
              'playlist': [
                  {
                      'md5': '9480285eff92d64f06e02a5367970a7a',
@@ -188,6 +189,10 @@ class NRKTVIE(InfoExtractor):
                  'duration': 6947.5199999999995,
              },
              'skip': 'Only works from Norway',
+        },
+        {
+            'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
+            'only_matching': True,
          }
      ]
  
@@ -206,7 +211,8 @@ class NRKTVIE(InfoExtractor):
          ]}
  
      def _extract_f4m(self, manifest_url, video_id):
-        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
+        return self._extract_f4m_formats(
+            manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -268,7 +274,7 @@ class NRKTVIE(InfoExtractor):
  
          m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
          if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
+            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
          self._sort_formats(formats)
  
          subtitles_url = self._html_search_regex(
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py

new file mode 100644 (file)

index 0000000..0f1f448
--- /dev/null
+++ b/youtube_dl/extractor/onionstudios.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class OnionStudiosIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+
+    _TESTS = [{
+        'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
+        'md5': 'd4851405d31adfadf71cd7a487b765bb',
+        'info_dict': {
+            'id': '2937',
+            'ext': 'mp4',
+            'title': 'Hannibal charges forward, stops for a cocktail',
+            'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'The A.V. Club',
+            'uploader_id': 'TheAVClub',
+        },
+    }, {
+        'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
+
+        formats = []
+        for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
+            if determine_ext(src) != 'm3u8':  # m3u8 always results in 403
+                formats.append({
+                    'url': src,
+                })
+        self._sort_formats(formats)
+
+        title = self._search_regex(
+            r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
+            webpage, 'title', group='title')
+        description = self._search_regex(
+            r'share_description\s*=\s*(["\'])(?P<description>[^\1]+?)\1',
+            webpage, 'description', default=None, group='description')
+        thumbnail = self._search_regex(
+            r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
+            webpage, 'thumbnail', default=False, group='thumbnail')
+
+        uploader_id = self._search_regex(
+            r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
+            webpage, 'uploader id', fatal=False, group='uploader_id')
+        uploader = self._search_regex(
+            r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
+            webpage, 'uploader', default=False, group='uploader')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py

index 143a7669639770e0cdfddc55e0b9395893301736..fec5d65ad94892ca0f40a9e49703c857d98b47a4 100644 (file)
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -1,3 +1,4 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
  import re
@@ -35,6 +36,9 @@ class PBSIE(InfoExtractor):
                  'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
                  'duration': 3190,
              },
+            'params': {
+                'skip_download': True,  # requires ffmpeg
+            },
          },
          {
              'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
@@ -46,6 +50,9 @@ class PBSIE(InfoExtractor):
                  'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
                  'duration': 5050,
              },
+            'params': {
+                'skip_download': True,  # requires ffmpeg
+            }
          },
          {
              'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
@@ -68,7 +75,10 @@ class PBSIE(InfoExtractor):
                  'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
                  'duration': 6559,
                  'thumbnail': 're:^https?://.*\.jpg$',
-            }
+            },
+            'params': {
+                'skip_download': True,  # requires ffmpeg
+            },
          },
          {
              'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
@@ -82,7 +92,10 @@ class PBSIE(InfoExtractor):
                  'duration': 3172,
                  'thumbnail': 're:^https?://.*\.jpg$',
                  'upload_date': '20140122',
-            }
+            },
+            'params': {
+                'skip_download': True,  # requires ffmpeg
+            },
          },
          {
              'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
@@ -90,6 +103,21 @@ class PBSIE(InfoExtractor):
                  'id': 'united-states-of-secrets',
              },
              'playlist_count': 2,
+        },
+        {
+            'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
+            'info_dict': {
+                'id': '2280706814',
+                'display_id': 'player',
+                'ext': 'mp4',
+                'title': 'Death and the Civil War',
+                'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
+                'duration': 6705,
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'skip_download': True,  # requires ffmpeg
+            },
          }
      ]
  
@@ -123,7 +151,7 @@ class PBSIE(InfoExtractor):
                  return media_id, presumptive_id, upload_date
  
              url = self._search_regex(
-                r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
+                r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
                  webpage, 'player URL')
              mobj = re.match(self._VALID_URL, url)
  
@@ -196,6 +224,14 @@ class PBSIE(InfoExtractor):
              rating_str = rating_str.rpartition('-')[2]
          age_limit = US_RATINGS.get(rating_str)
  
+        subtitles = {}
+        closed_captions_url = info.get('closed_captions_url')
+        if closed_captions_url:
+            subtitles['en'] = [{
+                'ext': 'ttml',
+                'url': closed_captions_url,
+            }]
+
          return {
              'id': video_id,
              'display_id': display_id,
@@ -206,4 +242,5 @@ class PBSIE(InfoExtractor):
              'age_limit': age_limit,
              'upload_date': upload_date,
              'formats': formats,
+            'subtitles': subtitles,
          }
diff --git a/youtube_dl/extractor/planetaplay.py b/youtube_dl/extractor/planetaplay.py

index 596c621d75067255f5be6e4eaf97ba897cc51fe5..06505e96fb9ac81224f03f71299eb80238c0921f 100644 (file)
--- a/youtube_dl/extractor/planetaplay.py
+++ b/youtube_dl/extractor/planetaplay.py
@@ -18,7 +18,8 @@ class PlanetaPlayIE(InfoExtractor):
              'id': '3586',
              'ext': 'flv',
              'title': 'md5:e829428ee28b1deed00de90de49d1da1',
-        }
+        },
+        'skip': 'Not accessible from Travis CI server',
      }
  
      _SONG_FORMATS = {
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py

index 45716c75d9505c5fcb7e8c6d73ec4feaef298aee..8a1c296dda8b57611a0e464387be43ab0fc9a370 100644 (file)
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -38,9 +38,7 @@ class PlayedIE(InfoExtractor):
          if m_error:
              raise ExtractorError(m_error.group('msg'), expected=True)
  
-        fields = re.findall(
-            r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
-        data = dict(fields)
+        data = self._hidden_inputs(orig_webpage)
  
          self._sleep(2, video_id)
  
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py

index 01cc3d9ea3ff845476a7f7b306c3bfee25078b96..304359dc5b189b8ce27c967c2d369b26db334532 100644 (file)
--- a/youtube_dl/extractor/primesharetv.py
+++ b/youtube_dl/extractor/primesharetv.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
@@ -31,12 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
          if '>File not exist<' in webpage:
              raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  
-        fields = dict(re.findall(r'''(?x)<input\s+
-            type="hidden"\s+
-            name="([^"]+)"\s+
-            (?:id="[^"]+"\s+)?
-            value="([^"]*)"
-            ''', webpage))
+        fields = self._hidden_inputs(webpage)
  
          headers = {
              'Referer': url,
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py

index f536e6e6cdfb3d71e21c98614e2baf117387493b..8190ed6766ce5c878fc82700524ec6d012d70a57 100644 (file)
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -35,10 +35,7 @@ class PromptFileIE(InfoExtractor):
              raise ExtractorError('Video %s does not exist' % video_id,
                                   expected=True)
  
-        fields = dict(re.findall(r'''(?x)type="hidden"\s+
-            name="(.+?)"\s+
-            value="(.*?)"
-            ''', webpage))
+        fields = self._hidden_inputs(webpage)
          post = compat_urllib_parse.urlencode(fields)
          req = compat_urllib_request.Request(url, post)
          req.add_header('Content-type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index 536a42dc88a4e17bbd039289508521d1ea13e282..fec008ce7687a0360d2e54e3130425245f30b4fc 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -9,8 +9,9 @@ from ..compat import (
      compat_urllib_parse,
  )
  from ..utils import (
-    unified_strdate,
+    determine_ext,
      int_or_none,
+    unified_strdate,
  )
  
  
@@ -21,6 +22,11 @@ class ProSiebenSat1IE(InfoExtractor):
  
      _TESTS = [
          {
+            # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
+            # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+            # - malformed f4m manifest support
+            # - proper handling of URLs starting with `https?://` in 2.0 manifests
+            # - recursive child f4m manifests extraction
              'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
              'info_dict': {
                  'id': '2104602',
@@ -208,7 +214,7 @@ class ProSiebenSat1IE(InfoExtractor):
          clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
  
          access_token = 'prosieben'
-        client_name = 'kolibri-1.12.6'
+        client_name = 'kolibri-2.0.19-splec4'
          client_location = url
  
          videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
@@ -275,8 +281,9 @@ class ProSiebenSat1IE(InfoExtractor):
  
          for source in urls_sources:
              protocol = source['protocol']
+            source_url = source['url']
              if protocol == 'rtmp' or protocol == 'rtmpe':
-                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url'])
+                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
                  if not mobj:
                      continue
                  path = mobj.group('path')
@@ -293,9 +300,11 @@ class ProSiebenSat1IE(InfoExtractor):
                      'ext': 'mp4',
                      'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
                  })
+            elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+                formats.extend(self._extract_f4m_formats(source_url, clip_id))
              else:
                  formats.append({
-                    'url': source['url'],
+                    'url': source_url,
                      'vbr': fix_bitrate(source['bitrate']),
                  })
  
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py

index bafa81c21c12f15ac8162f8303f56615124b260b..1654a641f00db6833056571394b4dbe0b2856150 100644 (file)
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -9,12 +9,14 @@ from .common import InfoExtractor
  from ..utils import (
      strip_jsonp,
      unescapeHTML,
+    clean_html,
  )
  from ..compat import compat_urllib_request
  
  
  class QQMusicIE(InfoExtractor):
      IE_NAME = 'qqmusic'
+    IE_DESC = 'QQ音乐'
      _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
      _TESTS = [{
          'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
@@ -26,6 +28,20 @@ class QQMusicIE(InfoExtractor):
              'upload_date': '20141227',
              'creator': '林俊杰',
              'description': 'md5:d327722d0361576fde558f1ac68a7065',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'note': 'There is no mp3-320 version of this song.',
+        'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
+        'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
+        'info_dict': {
+            'id': '004MsGEo3DdNxV',
+            'ext': 'mp3',
+            'title': '如果',
+            'upload_date': '20050626',
+            'creator': '李季美',
+            'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
+            'thumbnail': 're:^https?://.*\.jpg$',
          }
      }]
  
@@ -68,6 +84,14 @@ class QQMusicIE(InfoExtractor):
          if lrc_content:
              lrc_content = lrc_content.replace('\\n', '\n')
  
+        thumbnail_url = None
+        albummid = self._search_regex(
+            [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
+            detail_info_page, 'album mid', default=None)
+        if albummid:
+            thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
+                            % (albummid[-2:-1], albummid[-1], albummid)
+
          guid = self.m_r_get_ruin()
  
          vkey = self._download_json(
@@ -85,6 +109,7 @@ class QQMusicIE(InfoExtractor):
                  'preference': details['preference'],
                  'abr': details.get('abr'),
              })
+        self._check_formats(formats, mid)
          self._sort_formats(formats)
  
          return {
@@ -94,6 +119,7 @@ class QQMusicIE(InfoExtractor):
              'upload_date': publish_time,
              'creator': singer,
              'description': lrc_content,
+            'thumbnail': thumbnail_url,
          }
  
  
@@ -117,6 +143,7 @@ class QQPlaylistBaseIE(InfoExtractor):
  
  class QQMusicSingerIE(QQPlaylistBaseIE):
      IE_NAME = 'qqmusic:singer'
+    IE_DESC = 'QQ音乐 - 歌手'
      _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
      _TEST = {
          'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
@@ -161,39 +188,50 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
  
  class QQMusicAlbumIE(QQPlaylistBaseIE):
      IE_NAME = 'qqmusic:album'
+    IE_DESC = 'QQ音乐 - 专辑'
      _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
  
-    _TEST = {
-        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
+    _TESTS = [{
+        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
          'info_dict': {
              'id': '000gXCTb2AhRR1',
              'title': '我们都是这样长大的',
-            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
+            'description': 'md5:179c5dce203a5931970d306aa9607ea6',
          },
          'playlist_count': 4,
-    }
+    }, {
+        'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
+        'info_dict': {
+            'id': '002Y5a3b3AlCu3',
+            'title': '그리고...',
+            'description': 'md5:a48823755615508a95080e81b51ba729',
+        },
+        'playlist_count': 8,
+    }]
  
      def _real_extract(self, url):
          mid = self._match_id(url)
  
-        album_page = self._download_webpage(
-            self.qq_static_url('album', mid), mid, 'Download album page')
+        album = self._download_json(
+            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid,
+            mid, 'Download album page')['data']
  
-        entries = self.get_entries_from_page(album_page)
-
-        album_name = self._html_search_regex(
-            r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
-            default=None)
-
-        album_detail = self._html_search_regex(
-            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
-            album_page, 'album details', default=None)
+        entries = [
+            self.url_result(
+                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
+            ) for song in album['list']
+        ]
+        album_name = album.get('name')
+        album_detail = album.get('desc')
+        if album_detail is not None:
+            album_detail = album_detail.strip()
  
          return self.playlist_result(entries, mid, album_name, album_detail)
  
  
  class QQMusicToplistIE(QQPlaylistBaseIE):
      IE_NAME = 'qqmusic:toplist'
+    IE_DESC = 'QQ音乐 - 排行榜'
      _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
  
      _TESTS = [{
@@ -243,3 +281,37 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
          list_name = topinfo.get('ListName')
          list_description = topinfo.get('info')
          return self.playlist_result(entries, list_id, list_name, list_description)
+
+
+class QQMusicPlaylistIE(QQPlaylistBaseIE):
+    IE_NAME = 'qqmusic:playlist'
+    IE_DESC = 'QQ音乐 - 歌单'
+    _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://y.qq.com/#type=taoge&id=3462654915',
+        'info_dict': {
+            'id': '3462654915',
+            'title': '韩国5月新歌精选下旬',
+            'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
+        },
+        'playlist_count': 40,
+    }
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+
+        list_json = self._download_json(
+            'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
+            % list_id, list_id, 'Download list page',
+            transform_source=strip_jsonp)['cdlist'][0]
+
+        entries = [
+            self.url_result(
+                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
+            ) for song in list_json['songlist']
+        ]
+
+        list_name = list_json.get('dissname')
+        list_description = clean_html(unescapeHTML(list_json.get('desc')))
+        return self.playlist_result(entries, list_id, list_name, list_description)
diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py

index af7d76cf47e575277de3ffe480fdb8e1eb48b43c..f414e2384e619e8faccab280bdb9e8a9518819c9 100644 (file)
--- a/youtube_dl/extractor/quickvid.py
+++ b/youtube_dl/extractor/quickvid.py
@@ -24,6 +24,7 @@ class QuickVidIE(InfoExtractor):
              'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
              'view_count': int,
          },
+        'skip': 'Not accessible from Travis CI server',
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/rds.py b/youtube_dl/extractor/rds.py

new file mode 100644 (file)

index 0000000..796adfd
--- /dev/null
+++ b/youtube_dl/extractor/rds.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class RDSIE(InfoExtractor):
+    IE_DESC = 'RDS.ca'
+    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
+
+    _TESTS = [{
+        'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
+        'info_dict': {
+            'id': '3.1132799',
+            'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
+            'ext': 'mp4',
+            'title': 'Fowler Jr. prend la direction de Jacksonville',
+            'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
+            'timestamp': 1430397346,
+            'upload_date': '20150430',
+            'duration': 154.354,
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        # TODO: extract f4m from 9c9media.com
+        video_url = self._search_regex(
+            r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
+            webpage, 'video url')
+
+        title = self._og_search_title(webpage) or self._html_search_meta(
+            'title', webpage, 'title', fatal=True)
+        description = self._og_search_description(webpage) or self._html_search_meta(
+            'description', webpage, 'description')
+        thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
+            [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
+             r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
+            webpage, 'thumbnail', fatal=False)
+        timestamp = parse_iso8601(self._search_regex(
+            r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
+            webpage, 'upload date', fatal=False))
+        duration = parse_duration(self._search_regex(
+            r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
+            webpage, 'duration', fatal=False))
+        age_limit = self._family_friendly_search(webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py

index 41d202c289839d00de98eb0a3b1e5770eb0990f5..a4d3d73ff78a4bd1ac59866b3159c804844b77ae 100644 (file)
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -43,6 +43,10 @@ class RtlNlIE(InfoExtractor):
              'upload_date': '20150215',
              'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
          }
+    }, {
+        # encrypted m3u8 streams, georestricted
+        'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
+        'only_matching': True,
      }, {
          'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
          'only_matching': True,
@@ -51,7 +55,7 @@ class RtlNlIE(InfoExtractor):
      def _real_extract(self, url):
          uuid = self._match_id(url)
          info = self._download_json(
-            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
              uuid)
  
          material = info['material'][0]
@@ -59,9 +63,14 @@ class RtlNlIE(InfoExtractor):
          subtitle = material['title'] or info['episodes'][0]['name']
          description = material.get('synopsis') or info['episodes'][0]['synopsis']
  
+        meta = info.get('meta', {})
+
          # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
-        videopath = material['videopath'].replace('.f4m', '.m3u8')
-        m3u8_url = 'http://manifest.us.rtl.nl' + videopath
+        # NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
+        # this adaptive -> flash workaround is not required in general, but it also
+        # allows bypassing georestriction therefore is retained for now.
+        videopath = material['videopath'].replace('/adaptive/', '/flash/')
+        m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
  
          formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
  
@@ -82,7 +91,7 @@ class RtlNlIE(InfoExtractor):
          self._sort_formats(formats)
  
          thumbnails = []
-        meta = info.get('meta', {})
+
          for p in ('poster_base_url', '"thumb_base_url"'):
              if not meta.get(p):
                  continue
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py

index 9f3e944e73532a92c5213ce95e7633fe6bc4c212..a07677686a4ecc2923b310c3aeeeaab610bb0868 100644 (file)
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -1,6 +1,5 @@
  from __future__ import unicode_literals
  
-import re
  import base64
  
  from .common import InfoExtractor
@@ -35,8 +34,7 @@ class SharedIE(InfoExtractor):
              raise ExtractorError(
                  'Video %s does not exist' % video_id, expected=True)
  
-        download_form = dict(re.findall(
-            r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
+        download_form = self._hidden_inputs(webpage)
          request = compat_urllib_request.Request(
              url, compat_urllib_parse.urlencode(download_form))
          request.add_header('Content-Type', 'application/x-www-form-urlencoded')
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py

index 24746a09a0c2183e8a0bd8e239cb59291b41f19a..93a7cfe15cc764bc61b912dd2e3283d950790565 100644 (file)
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -53,7 +53,7 @@ class SmotriIE(InfoExtractor):
                  'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
              },
          },
-        # video-password
+        # video-password, not approved by moderator
          {
              'url': 'http://smotri.com/video/view/?id=v1390466a13c',
              'md5': 'f6331cef33cad65a0815ee482a54440b',
@@ -71,7 +71,24 @@ class SmotriIE(InfoExtractor):
              },
              'skip': 'Video is not approved by moderator',
          },
-        # age limit + video-password
+        # video-password
+        {
+            'url': 'http://smotri.com/video/view/?id=v6984858774#',
+            'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
+            'info_dict': {
+                'id': 'v6984858774',
+                'ext': 'mp4',
+                'title': 'Дача Солженицина ПАРОЛЬ 223322',
+                'uploader': 'psavari1',
+                'uploader_id': 'psavari1',
+                'upload_date': '20081103',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'videopassword': '223322',
+            },
+        },
+        # age limit + video-password, not approved by moderator
          {
              'url': 'http://smotri.com/video/view/?id=v15408898bcf',
              'md5': '91e909c9f0521adf5ee86fbe073aad70',
@@ -90,19 +107,22 @@ class SmotriIE(InfoExtractor):
              },
              'skip': 'Video is not approved by moderator',
          },
-        # not approved by moderator, but available
+        # age limit + video-password
          {
-            'url': 'http://smotri.com/video/view/?id=v28888533b73',
-            'md5': 'f44bc7adac90af518ef1ecf04893bb34',
+            'url': 'http://smotri.com/video/view/?id=v7780025814',
+            'md5': 'b4599b068422559374a59300c5337d72',
              'info_dict': {
-                'id': 'v28888533b73',
+                'id': 'v7780025814',
                  'ext': 'mp4',
-                'title': 'Russian Spies Killed By ISIL Child Soldier',
-                'uploader': 'Mopeder',
-                'uploader_id': 'mopeder',
-                'duration': 71,
-                'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
-                'upload_date': '20150114',
+                'title': 'Sexy Beach (пароль 123)',
+                'uploader': 'вАся',
+                'uploader_id': 'asya_prosto',
+                'upload_date': '20081218',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'age_limit': 18,
+            },
+            'params': {
+                'videopassword': '123'
              },
          },
          # swf player
@@ -152,6 +172,10 @@ class SmotriIE(InfoExtractor):
              'getvideoinfo': '1',
          }
  
+        video_password = self._downloader.params.get('videopassword', None)
+        if video_password:
+            video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
+
          request = compat_urllib_request.Request(
              'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
          request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -161,13 +185,18 @@ class SmotriIE(InfoExtractor):
          video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
  
          if not video_url:
-            if video.get('_moderate_no') or not video.get('moderated'):
+            if video.get('_moderate_no'):
                  raise ExtractorError(
                      'Video %s has not been approved by moderator' % video_id, expected=True)
  
              if video.get('error'):
                  raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  
+            if video.get('_pass_protected') == 1:
+                msg = ('Invalid video password' if video_password
+                       else 'This video is protected by a password, use the --video-password option')
+                raise ExtractorError(msg, expected=True)
+
          title = video['title']
          thumbnail = video['_imgURL']
          upload_date = unified_strdate(video['added'])
diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py

new file mode 100644 (file)

index 0000000..cf495f3
--- /dev/null
+++ b/youtube_dl/extractor/snagfilms.py
@@ -0,0 +1,171 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    determine_ext,
+    int_or_none,
+    js_to_json,
+    parse_duration,
+)
+
+
+class SnagFilmsEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
+    _TESTS = [{
+        'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
+        'md5': '2924e9215c6eff7a55ed35b72276bd93',
+        'info_dict': {
+            'id': '74849a00-85a9-11e1-9660-123139220831',
+            'ext': 'mp4',
+            'title': '#whilewewatch',
+        }
+    }, {
+        'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        if '>This film is not playable in your area.<' in webpage:
+            raise ExtractorError(
+                'Film %s is not playable in your area.' % video_id, expected=True)
+
+        formats = []
+        for source in self._parse_json(js_to_json(self._search_regex(
+                r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
+            file_ = source.get('file')
+            if not file_:
+                continue
+            type_ = source.get('type')
+            format_id = source.get('label')
+            ext = determine_ext(file_)
+            if any(_ == 'm3u8' for _ in (type_, ext)):
+                formats.extend(self._extract_m3u8_formats(
+                    file_, video_id, 'mp4', m3u8_id='hls'))
+            else:
+                bitrate = int_or_none(self._search_regex(
+                    r'(\d+)kbps', file_, 'bitrate', default=None))
+                height = int_or_none(self._search_regex(
+                    r'^(\d+)[pP]$', format_id, 'height', default=None))
+                formats.append({
+                    'url': file_,
+                    'format_id': format_id,
+                    'tbr': bitrate,
+                    'height': height,
+                })
+        self._sort_formats(formats)
+
+        title = self._search_regex(
+            [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
+            webpage, 'title')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+        }
+
+
+class SnagFilmsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
+    _TESTS = [{
+        'url': 'http://www.snagfilms.com/films/title/lost_for_life',
+        'md5': '19844f897b35af219773fd63bdec2942',
+        'info_dict': {
+            'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
+            'display_id': 'lost_for_life',
+            'ext': 'mp4',
+            'title': 'Lost for Life',
+            'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 4489,
+            'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
+        }
+    }, {
+        'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
+        'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
+        'info_dict': {
+            'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
+            'display_id': 'the_world_cut_project/india',
+            'ext': 'mp4',
+            'title': 'India',
+            'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 979,
+            'categories': ['Documentary', 'Sports', 'Politics']
+        }
+    }, {
+        # Film is not playable in your area.
+        'url': 'http://www.snagfilms.com/films/title/inside_mecca',
+        'only_matching': True,
+    }, {
+        # Film is not available.
+        'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        if ">Sorry, the Film you're looking for is not available.<" in webpage:
+            raise ExtractorError(
+                'Film %s is not available.' % display_id, expected=True)
+
+        film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
+
+        snag = self._parse_json(
+            self._search_regex(
+                'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
+            display_id)
+
+        for item in snag:
+            if item.get('data', {}).get('film', {}).get('id') == film_id:
+                data = item['data']['film']
+                title = data['title']
+                description = clean_html(data.get('synopsis'))
+                thumbnail = data.get('image')
+                duration = int_or_none(data.get('duration') or data.get('runtime'))
+                categories = [
+                    category['title'] for category in data.get('categories', [])
+                    if category.get('title')]
+                break
+        else:
+            title = self._search_regex(
+                r'itemprop="title">([^<]+)<', webpage, 'title')
+            description = self._html_search_regex(
+                r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
+                webpage, 'description', default=None) or self._og_search_description(webpage)
+            thumbnail = self._og_search_thumbnail(webpage)
+            duration = parse_duration(self._search_regex(
+                r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
+                webpage, 'duration', fatal=False))
+            categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
+            'id': film_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'categories': categories,
+        }
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index c23c5ee0fb853f86e4662ad574b587610456159c..118ca483265cfda0a4419d93e8ce2fd174148be4 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
      _VALID_URL = r'''(?x)^(?:https?://)?
                      (?:(?:(?:www\.|m\.)?soundcloud\.com/
                              (?P<uploader>[\w\d-]+)/
-                            (?!sets/|likes/?(?:$|[?#]))
+                            (?!sets/|(?:likes|tracks)/?(?:$|[?#]))
                              (?P<title>[\w\d-]+)/?
                              (?P<token>[^?]+?)?(?:[?].*)?$)
                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -307,6 +307,9 @@ class SoundcloudUserIE(SoundcloudIE):
              'title': 'The Royal Concept',
          },
          'playlist_mincount': 1,
+    }, {
+        'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py

index 08a5c4314959409cd93d71fcd8f8160ee541b7c4..27f4033c547a9700db6af520c4fe4a957e3755c0 100644 (file)
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@@ -77,11 +77,13 @@ class SpiegeltvIE(InfoExtractor):
                      'rtmp_live': True,
                  })
              elif determine_ext(endpoint) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_formats = self._extract_m3u8_formats(
                      endpoint.replace('[video]', play_path),
                      video_id, 'm4v',
                      preference=1,  # Prefer hls since it allows to workaround georestriction
-                    m3u8_id='hls'))
+                    m3u8_id='hls', fatal=False)
+                if m3u8_formats is not False:
+                    formats.extend(m3u8_formats)
              else:
                  formats.append({
                      'url': endpoint,
diff --git a/youtube_dl/extractor/thesixtyone.py b/youtube_dl/extractor/thesixtyone.py

index a77c6a2fc9f2838305145c97e9920d09635ceba7..5d09eb9a8b28cdd2f8bea0743d947f98415564a2 100644 (file)
--- a/youtube_dl/extractor/thesixtyone.py
+++ b/youtube_dl/extractor/thesixtyone.py
@@ -1,9 +1,6 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import json
-import re
-
  from .common import InfoExtractor
  from ..utils import unified_strdate
  
@@ -17,7 +14,7 @@ class TheSixtyOneIE(InfoExtractor):
              song
          )/(?P<id>[A-Za-z0-9]+)/?$'''
      _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
-    _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
+    _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
      _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
      _TESTS = [
          {
@@ -70,14 +67,19 @@ class TheSixtyOneIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        song_id = mobj.group('id')
+        song_id = self._match_id(url)
  
          webpage = self._download_webpage(
              self._SONG_URL_TEMPLATE.format(song_id), song_id)
  
-        song_data = json.loads(self._search_regex(
-            r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
+        song_data = self._parse_json(self._search_regex(
+            r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'), song_id)
+
+        if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None):
+            song_data['audio_server'] = 's3.amazonaws.com'
+        else:
+            song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com'
+
          keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
          url = self._SONG_FILE_URL_TEMPLATE.format(
              "".join(reversed(keys)), **song_data)
diff --git a/youtube_dl/extractor/thisamericanlife.py b/youtube_dl/extractor/thisamericanlife.py

new file mode 100644 (file)

index 0000000..36493a5
--- /dev/null
+++ b/youtube_dl/extractor/thisamericanlife.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisAmericanLifeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
+        'md5': '8f7d2da8926298fdfca2ee37764c11ce',
+        'info_dict': {
+            'id': '487',
+            'ext': 'm4a',
+            'title': '487: Harper High School, Part One',
+            'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id)
+
+        return {
+            'id': video_id,
+            'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id),
+            'protocol': 'm3u8_native',
+            'ext': 'm4a',
+            'acodec': 'aac',
+            'vcodec': 'none',
+            'abr': 64,
+            'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
+            'description': self._html_search_meta(r'description', webpage, 'description'),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py

index c282865b2517d8cbd62df6f2dee0540146baae48..49516abca690721a83dee5044bb2cdd6540d4a07 100644 (file)
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -3,39 +3,70 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_str
  from ..utils import (
-    parse_duration,
      fix_xml_ampersands,
+    float_or_none,
+    int_or_none,
+    parse_duration,
+    str_to_int,
+    xpath_text,
  )
  
  
-class TNAFlixIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
-
-    _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
-    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
-    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
-
-    _TESTS = [
-        {
-            'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
-            'md5': 'ecf3498417d09216374fc5907f9c6ec0',
-            'info_dict': {
-                'id': '553878',
-                'display_id': 'Carmella-Decesare-striptease',
-                'ext': 'mp4',
-                'title': 'Carmella Decesare - striptease',
-                'description': '',
-                'thumbnail': 're:https?://.*\.jpg$',
-                'duration': 91,
-                'age_limit': 18,
-            }
-        },
-        {
-            'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
-            'only_matching': True,
-        }
+class TNAFlixNetworkBaseIE(InfoExtractor):
+    # May be overridden in descendants if necessary
+    _CONFIG_REGEX = [
+        r'flashvars\.config\s*=\s*escape\("([^"]+)"',
+        r'<input[^>]+name="config\d?" value="([^"]+)"',
      ]
+    _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
+    _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
+    _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
+    _VIEW_COUNT_REGEX = None
+    _COMMENT_COUNT_REGEX = None
+    _AVERAGE_RATING_REGEX = None
+    _CATEGORIES_REGEX = r'<li[^>]*>\s*<span[^>]+class="infoTitle"[^>]*>Categories:</span>\s*<span[^>]+class="listView"[^>]*>(.+?)</span>\s*</li>'
+
+    def _extract_thumbnails(self, flix_xml):
+
+        def get_child(elem, names):
+            for name in names:
+                child = elem.find(name)
+                if child is not None:
+                    return child
+
+        timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage'])
+        if timeline is None:
+            return
+
+        pattern_el = get_child(timeline, ['imagePattern', 'pattern'])
+        if pattern_el is None or not pattern_el.text:
+            return
+
+        first_el = get_child(timeline, ['imageFirst', 'first'])
+        last_el = get_child(timeline, ['imageLast', 'last'])
+        if first_el is None or last_el is None:
+            return
+
+        first_text = first_el.text
+        last_text = last_el.text
+        if not first_text.isdigit() or not last_text.isdigit():
+            return
+
+        first = int(first_text)
+        last = int(last_text)
+        if first > last:
+            return
+
+        width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width'))
+        height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height'))
+
+        return [{
+            'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'),
+            'width': width,
+            'height': height,
+        } for i in range(first, last + 1)]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -44,47 +75,195 @@ class TNAFlixIE(InfoExtractor):
  
          webpage = self._download_webpage(url, display_id)
  
-        title = self._html_search_regex(
-            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
-        description = self._html_search_regex(
-            self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
-
-        age_limit = self._rta_search(webpage)
-
-        duration = parse_duration(self._html_search_meta(
-            'duration', webpage, 'duration', default=None))
-
          cfg_url = self._proto_relative_url(self._html_search_regex(
              self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
  
          cfg_xml = self._download_xml(
-            cfg_url, display_id, note='Downloading metadata',
+            cfg_url, display_id, 'Downloading metadata',
              transform_source=fix_xml_ampersands)
  
-        thumbnail = self._proto_relative_url(
-            cfg_xml.find('./startThumb').text, 'http:')
-
          formats = []
+
+        def extract_video_url(vl):
+            return re.sub('speed=\d+', 'speed=', vl.text)
+
+        video_link = cfg_xml.find('./videoLink')
+        if video_link is not None:
+            formats.append({
+                'url': extract_video_url(video_link),
+                'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
+            })
+
          for item in cfg_xml.findall('./quality/item'):
-            video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
-            format_id = item.find('res').text
-            fmt = {
-                'url': self._proto_relative_url(video_url, 'http:'),
+            video_link = item.find('./videoLink')
+            if video_link is None:
+                continue
+            res = item.find('res')
+            format_id = None if res is None else res.text
+            height = int_or_none(self._search_regex(
+                r'^(\d+)[pP]', format_id, 'height', default=None))
+            formats.append({
+                'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
                  'format_id': format_id,
-            }
-            m = re.search(r'^(\d+)', format_id)
-            if m:
-                fmt['height'] = int(m.group(1))
-            formats.append(fmt)
+                'height': height,
+            })
+
          self._sort_formats(formats)
  
+        thumbnail = self._proto_relative_url(
+            xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
+        thumbnails = self._extract_thumbnails(cfg_xml)
+
+        title = self._html_search_regex(
+            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
+
+        age_limit = self._rta_search(webpage)
+
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', default=None))
+
+        def extract_field(pattern, name):
+            return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
+
+        description = extract_field(self._DESCRIPTION_REGEX, 'description')
+        uploader = extract_field(self._UPLOADER_REGEX, 'uploader')
+        view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))
+        comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count'))
+        average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
+
+        categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
+        categories = categories_str.split(', ') if categories_str is not None else []
+
          return {
              'id': video_id,
              'display_id': display_id,
              'title': title,
              'description': description,
              'thumbnail': thumbnail,
+            'thumbnails': thumbnails,
              'duration': duration,
              'age_limit': age_limit,
+            'uploader': uploader,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'average_rating': average_rating,
+            'categories': categories,
              'formats': formats,
          }
+
+
+class TNAFlixIE(TNAFlixNetworkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
+
+    _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
+    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
+    _UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
+
+    _TESTS = [{
+        # anonymous uploader, no categories
+        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+        'info_dict': {
+            'id': '553878',
+            'display_id': 'Carmella-Decesare-striptease',
+            'ext': 'mp4',
+            'title': 'Carmella Decesare - striptease',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 91,
+            'age_limit': 18,
+            'uploader': 'Anonymous',
+            'categories': [],
+        }
+    }, {
+        # non-anonymous uploader, categories
+        'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
+        'md5': '0f5d4d490dbfd117b8607054248a07c0',
+        'info_dict': {
+            'id': '6538',
+            'display_id': 'Educational-xxx-video',
+            'ext': 'mp4',
+            'title': 'Educational xxx video',
+            'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 164,
+            'age_limit': 18,
+            'uploader': 'bobwhite39',
+            'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
+        }
+    }, {
+        'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+        'only_matching': True,
+    }]
+
+
+class EMPFlixIE(TNAFlixNetworkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
+
+    _UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
+
+    _TESTS = [{
+        'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+        'info_dict': {
+            'id': '33051',
+            'display_id': 'Amateur-Finger-Fuck',
+            'ext': 'mp4',
+            'title': 'Amateur Finger Fuck',
+            'description': 'Amateur solo finger fucking.',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 83,
+            'age_limit': 18,
+            'uploader': 'cwbike',
+            'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'],
+        }
+    }, {
+        'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+        'only_matching': True,
+    }]
+
+
+class MovieFapIE(TNAFlixNetworkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
+
+    _VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
+    _COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
+    _AVERAGE_RATING_REGEX = r'Current Rating\s*<br>\s*<strong>([\d.]+)</strong>'
+    _CATEGORIES_REGEX = r'(?s)<div[^>]+id="vid_info"[^>]*>\s*<div[^>]*>.+?</div>(.*?)<br>'
+
+    _TESTS = [{
+        # normal, multi-format video
+        'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html',
+        'md5': '26624b4e2523051b550067d547615906',
+        'info_dict': {
+            'id': 'be9867c9416c19f54a4a',
+            'display_id': 'experienced-milf-amazing-handjob',
+            'ext': 'mp4',
+            'title': 'Experienced MILF Amazing Handjob',
+            'description': 'Experienced MILF giving an Amazing Handjob',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+            'uploader': 'darvinfred06',
+            'view_count': int,
+            'comment_count': int,
+            'average_rating': float,
+            'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'],
+        }
+    }, {
+        # quirky single-format case where the extension is given as fid, but the video is really an flv
+        'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html',
+        'md5': 'fa56683e291fc80635907168a743c9ad',
+        'info_dict': {
+            'id': 'e5da0d3edce5404418f5',
+            'display_id': 'jeune-couple-russe',
+            'ext': 'flv',
+            'title': 'Jeune Couple Russe',
+            'description': 'Amateur',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+            'uploader': 'whiskeyjar',
+            'view_count': int,
+            'comment_count': int,
+            'average_rating': float,
+            'categories': ['Amateur', 'Teen'],
+        }
+    }]
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py

index 94bd6345da18815a50b72502a8b91ae4e30ae2b5..92b6dc1b820eef8e6a7a0b42bd48bfc5d2422e38 100644 (file)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -22,8 +22,8 @@ class TwitchBaseIE(InfoExtractor):
  
      _API_BASE = 'https://api.twitch.tv'
      _USHER_BASE = 'http://usher.twitch.tv'
-    _LOGIN_URL = 'https://secure.twitch.tv/user/login'
-    _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
+    _LOGIN_URL = 'https://secure.twitch.tv/login'
+    _LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
      _NETRC_MACHINE = 'twitch'
  
      def _handle_error(self, response):
@@ -59,20 +59,12 @@ class TwitchBaseIE(InfoExtractor):
          login_page = self._download_webpage(
              self._LOGIN_URL, None, 'Downloading login page')
  
-        authenticity_token = self._search_regex(
-            r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
-            login_page, 'authenticity token')
-
-        login_form = {
-            'utf8': '✓'.encode('utf-8'),
-            'authenticity_token': authenticity_token,
-            'redirect_on_login': '',
-            'embed_form': 'false',
-            'mp_source_action': 'login-button',
-            'follow': '',
-            'login': username,
-            'password': password,
-        }
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'login': username.encode('utf-8'),
+            'password': password.encode('utf-8'),
+        })
  
          request = compat_urllib_request.Request(
              self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
@@ -80,11 +72,15 @@ class TwitchBaseIE(InfoExtractor):
          response = self._download_webpage(
              request, None, 'Logging in as %s' % username)
  
-        m = re.search(
-            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
-        if m:
+        error_message = self._search_regex(
+            r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
+            response, 'error message', default=None)
+        if error_message:
              raise ExtractorError(
-                'Unable to login: %s' % m.group('msg').strip(), expected=True)
+                'Unable to login. Twitch said: %s' % error_message, expected=True)
+
+        if '>Reset your password<' in response:
+            self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
  
      def _prefer_source(self, formats):
          try:
@@ -189,17 +185,17 @@ class TwitchVodIE(TwitchItemBaseIE):
      _ITEM_SHORTCUT = 'v'
  
      _TEST = {
-        'url': 'http://www.twitch.tv/ksptv/v/3622000',
+        'url': 'http://www.twitch.tv/riotgames/v/6528877',
          'info_dict': {
-            'id': 'v3622000',
+            'id': 'v6528877',
              'ext': 'mp4',
-            'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
+            'title': 'LCK Summer Split - Week 6 Day 1',
              'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 6951,
-            'timestamp': 1419028564,
-            'upload_date': '20141219',
-            'uploader': 'KSPTV',
-            'uploader_id': 'ksptv',
+            'duration': 17208,
+            'timestamp': 1435131709,
+            'upload_date': '20150624',
+            'uploader': 'Riot Games',
+            'uploader_id': 'riotgames',
              'view_count': int,
          },
          'params': {
@@ -215,7 +211,7 @@ class TwitchVodIE(TwitchItemBaseIE):
              '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
              'Downloading %s access token' % self._ITEM_TYPE)
          formats = self._extract_m3u8_formats(
-            '%s/vod/%s?nauth=%s&nauthsig=%s'
+            '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
              % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
              item_id, 'mp4')
          self._prefer_source(formats)
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py

new file mode 100644 (file)

index 0000000..1aaa063
--- /dev/null
+++ b/youtube_dl/extractor/twitter.py
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+    float_or_none,
+    unescapeHTML,
+)
+
+
+class TwitterCardIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+        'md5': 'a74f50b310c83170319ba16de6955192',
+        'info_dict': {
+            'id': '560070183650213889',
+            'ext': 'mp4',
+            'title': 'TwitterCard',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 30.033,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        # Different formats served for different User-Agents
+        USER_AGENTS = [
+            'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',  # mp4
+            'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',  # webm
+        ]
+
+        config = None
+        formats = []
+        for user_agent in USER_AGENTS:
+            request = compat_urllib_request.Request(url)
+            request.add_header('User-Agent', user_agent)
+            webpage = self._download_webpage(request, video_id)
+
+            config = self._parse_json(
+                unescapeHTML(self._search_regex(
+                    r'data-player-config="([^"]+)"', webpage, 'data player config')),
+                video_id)
+
+            video_url = config['playlist'][0]['source']
+
+            f = {
+                'url': video_url,
+            }
+
+            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+            if m:
+                f.update({
+                    'width': int(m.group('width')),
+                    'height': int(m.group('height')),
+                })
+            formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnail = config.get('posterImageUrl')
+        duration = float_or_none(config.get('duration'))
+
+        return {
+            'id': video_id,
+            'title': 'TwitterCard',
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py

index 4667ed83b71f4aec5f081741834e2c9cca010e82..e2bab52fef3451596ec1cf0de19e3131e378b5dd 100644 (file)
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -15,7 +15,8 @@ from ..utils import (
  class UdemyIE(InfoExtractor):
      IE_NAME = 'udemy'
      _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
-    _LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
+    _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+    _ORIGIN_URL = 'https://www.udemy.com'
      _NETRC_MACHINE = 'udemy'
  
      _TESTS = [{
@@ -74,29 +75,33 @@ class UdemyIE(InfoExtractor):
                  expected=True)
  
          login_popup = self._download_webpage(
-            'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
-            'Downloading login popup')
+            self._LOGIN_URL, None, 'Downloading login popup')
  
          if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
              return
  
-        csrf = self._html_search_regex(
-            r'<input type="hidden" name="csrf" value="(.+?)"',
-            login_popup, 'csrf token')
+        login_form = self._form_hidden_inputs('login-form', login_popup)
+
+        login_form.update({
+            'email': username.encode('utf-8'),
+            'password': password.encode('utf-8'),
+        })
  
-        login_form = {
-            'email': username,
-            'password': password,
-            'csrf': csrf,
-            'displayType': 'json',
-            'isSubmitted': '1',
-        }
          request = compat_urllib_request.Request(
              self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        response = self._download_json(
+        request.add_header('Referer', self._ORIGIN_URL)
+        request.add_header('Origin', self._ORIGIN_URL)
+
+        response = self._download_webpage(
              request, None, 'Logging in as %s' % username)
  
-        if 'returnUrl' not in response:
+        if all(logout_pattern not in response
+               for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']):
+            error = self._html_search_regex(
+                r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+                response, 'error message', default=None)
+            if error:
+                raise ExtractorError('Unable to login: %s' % error, expected=True)
              raise ExtractorError('Unable to log in')
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py

index c08428acfab446dff6157035ef032ae326199ebf..2151f83382d6b3185722b54de2d0eab2a988c6ae 100644 (file)
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -11,6 +11,7 @@ from ..compat import compat_urlparse
  
  
  class UDNEmbedIE(InfoExtractor):
+    IE_DESC = '聯合影音'
      _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
      _TESTS = [{
          'url': 'http://video.udn.com/embed/news/300040',
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index cae90205d30b0917e359df6281646eef64b82def..10d6745af703e00d6962d3e14c8b01f2419ad955 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -452,11 +452,7 @@ class VimeoChannelIE(InfoExtractor):
          password = self._downloader.params.get('videopassword', None)
          if password is None:
              raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
-        fields = dict(re.findall(r'''(?x)<input\s+
-            type="hidden"\s+
-            name="([^"]+)"\s+
-            value="([^"]*)"
-            ''', login_form))
+        fields = self._hidden_inputs(login_form)
          token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
          fields['token'] = token
          fields['password'] = password
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py

index aa3d6ddfd2420524fd87f85819d2611225224e79..92321d66e369626c0adfeda6cb4fae282a6f7abb 100644 (file)
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dl/extractor/vimple.py
@@ -4,7 +4,29 @@ from .common import InfoExtractor
  from ..utils import int_or_none
  
  
-class VimpleIE(InfoExtractor):
+class SprutoBaseIE(InfoExtractor):
+    def _extract_spruto(self, spruto, video_id):
+        playlist = spruto['playlist'][0]
+        title = playlist['title']
+        video_id = playlist.get('videoId') or video_id
+        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+        duration = int_or_none(playlist.get('duration'))
+
+        formats = [{
+            'url': f['url'],
+        } for f in playlist['video']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
+
+
+class VimpleIE(SprutoBaseIE):
      IE_DESC = 'Vimple - one-click video hosting'
      _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
      _TESTS = [
@@ -30,25 +52,9 @@ class VimpleIE(InfoExtractor):
          webpage = self._download_webpage(
              'http://player.vimple.ru/iframe/%s' % video_id, video_id)
  
-        playlist = self._parse_json(
+        spruto = self._parse_json(
              self._search_regex(
                  r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
-            video_id)['playlist'][0]
-
-        title = playlist['title']
-        video_id = playlist.get('videoId') or video_id
-        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
-        duration = int_or_none(playlist.get('duration'))
-
-        formats = [{
-            'url': f['url'],
-        } for f in playlist['video']]
-        self._sort_formats(formats)
+            video_id)
  
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-        }
+        return self._extract_spruto(spruto, video_id)
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index 38ff3c1a949c0511d08518b77180be653defce62..8f677cae3a503ef34230cf205c12378e3c2ecb66 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -21,7 +21,17 @@ from ..utils import (
  
  class VKIE(InfoExtractor):
      IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|
+                            (?:
+                                (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
+                                (?:www\.)?biqle\.ru/watch/
+                            )
+                            (?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
+                        )
+                    '''
      _NETRC_MACHINE = 'vk'
  
      _TESTS = [
@@ -109,11 +119,45 @@ class VKIE(InfoExtractor):
              },
              'skip': 'Only works from Russia',
          },
+        {
+            # video (removed?) only available with list id
+            'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
+            'md5': '091287af5402239a1051c37ec7b92913',
+            'info_dict': {
+                'id': '171201961',
+                'ext': 'mp4',
+                'title': 'ТюменцевВВ_09.07.2015',
+                'uploader': 'Anton Ivanov',
+                'duration': 109,
+                'upload_date': '20150709',
+                'view_count': int,
+            },
+        },
+        {
+            # youtube embed
+            'url': 'https://vk.com/video276849682_170681728',
+            'info_dict': {
+                'id': 'V3K4mi0SYkc',
+                'ext': 'mp4',
+                'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+                'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+                'duration': 179,
+                'upload_date': '20130116',
+                'uploader': "Children's Joy Foundation",
+                'uploader_id': 'thecjf',
+                'view_count': int,
+            },
+        },
          {
              # removed video, just testing that we match the pattern
              'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
              'only_matching': True,
          },
+        {
+            # vk wrapper
+            'url': 'http://www.biqle.ru/watch/847655_160197695',
+            'only_matching': True,
+        }
      ]
  
      def _login(self):
@@ -121,20 +165,25 @@ class VKIE(InfoExtractor):
          if username is None:
              return
  
-        login_form = {
-            'act': 'login',
-            'role': 'al_frame',
-            'expire': '1',
+        login_page = self._download_webpage(
+            'https://vk.com', None, 'Downloading login page')
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
              'email': username.encode('cp1251'),
              'pass': password.encode('cp1251'),
-        }
+        })
  
-        request = compat_urllib_request.Request('https://login.vk.com/?act=login',
-                                                compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
+        request = compat_urllib_request.Request(
+            'https://login.vk.com/?act=login',
+            compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        login_page = self._download_webpage(
+            request, None, note='Logging in as %s' % username)
  
          if re.search(r'onLoginFailed', login_page):
-            raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
+            raise ExtractorError(
+                'Unable to login, incorrect username and/or password', expected=True)
  
      def _real_initialize(self):
          self._login()
@@ -146,9 +195,20 @@ class VKIE(InfoExtractor):
          if not video_id:
              video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
  
-        info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+        info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+
+        # Some videos (removed?) can only be downloaded with list id specified
+        list_id = mobj.group('list_id')
+        if list_id:
+            info_url += '&list=%s' % list_id
+
          info_page = self._download_webpage(info_url, video_id)
  
+        if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
+            raise ExtractorError(
+                'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
+                expected=True)
+
          ERRORS = {
              r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
              'Video %s has been removed from public access due to rightholder complaint.',
@@ -162,16 +222,20 @@ class VKIE(InfoExtractor):
  
              r'<!>Видео временно недоступно':
              'Video %s is temporarily unavailable.',
+
+            r'<!>Access denied':
+            'Access denied to video %s.',
          }
  
          for error_re, error_msg in ERRORS.items():
              if re.search(error_re, info_page):
                  raise ExtractorError(error_msg % video_id, expected=True)
  
-        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
-        if m_yt is not None:
-            self.to_screen('Youtube video detected')
-            return self.url_result(m_yt.group(1), 'Youtube')
+        youtube_url = self._search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+            info_page, 'youtube iframe', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube')
  
          m_rutube = re.search(
              r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py

index 1c0966a793511a2ec3a9d147bd75ff22e8fb7209..ccf1928b5d323f277b4e8a47bd4d008e821b147c 100644 (file)
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -1,8 +1,6 @@
  # -*- coding: utf-8 -*-
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
@@ -28,12 +26,7 @@ class VodlockerIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        fields = dict(re.findall(r'''(?x)<input\s+
-            type="hidden"\s+
-            name="([^"]+)"\s+
-            (?:id="[^"]+"\s+)?
-            value="([^"]*)"
-            ''', webpage))
+        fields = self._hidden_inputs(webpage)
  
          if fields['op'] == 'download1':
              self._sleep(3, video_id)  # they do detect when requests happen too fast!
diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py

index 405cb9db49f41a144a4c842d8f99aeb1c2023da9..149e364677fcab4d0374479c4b96ff741277b17e 100644 (file)
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@@ -36,6 +36,7 @@ class VubeIE(InfoExtractor):
                  'comment_count': int,
                  'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
              },
+            'skip': 'Not accessible from Travis CI server',
          }, {
              'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
              'md5': 'db7aba89d4603dadd627e9d1973946fe',
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py

index 73077a312549f6b883fdf549a2b364f6de35db9f..2037d9b3d57cd5876d85e9552ffcc9f387fcc975 100644 (file)
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -1,6 +1,8 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import int_or_none
  
@@ -98,3 +100,42 @@ class WebOfStoriesIE(InfoExtractor):
              'description': description,
              'duration': duration,
          }
+
+
+class WebOfStoriesPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://www.webofstories.com/playAll/donald.knuth',
+        'info_dict': {
+            'id': 'donald.knuth',
+            'title': 'Donald Knuth (Scientist)',
+        },
+        'playlist_mincount': 97,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
+            for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
+        ]
+
+        title = self._search_regex(
+            r'<div id="speakerName">\s*<span>([^<]+)</span>',
+            webpage, 'speaker', default=None)
+        if title:
+            field = self._search_regex(
+                r'<span id="primaryField">([^<]+)</span>',
+                webpage, 'field', default=None)
+            if field:
+                title += ' (%s)' % field
+
+        if not title:
+            title = self._search_regex(
+                r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>',
+                webpage, 'title')
+
+        return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py

index 81d885fdcee1cf788c217e862629df58f386d73c..5aac8adb36e2ad12e798cb4f0c77e5b204c7b91b 100644 (file)
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -13,6 +13,7 @@ from ..utils import (
  
  
  class XuiteIE(InfoExtractor):
+    IE_DESC = '隨意窩Xuite影音'
      _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
      _VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
      _TESTS = [{
diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py

index 9d851bae3b779d8ce434742d6f5ef70d3f423936..001ee17b6f93d457bdc2fbdaf802b61ef19e1b41 100644 (file)
--- a/youtube_dl/extractor/yam.py
+++ b/youtube_dl/extractor/yam.py
@@ -14,6 +14,7 @@ from ..utils import (
  
  
  class YamIE(InfoExtractor):
+    IE_DESC = '蕃薯藤yam天空部落'
      _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
  
      _TESTS = [{
diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dl/extractor/yinyuetai.py

new file mode 100644 (file)

index 0000000..834d860
--- /dev/null
+++ b/youtube_dl/extractor/yinyuetai.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class YinYueTaiIE(InfoExtractor):
+    IE_NAME = 'yinyuetai:video'
+    IE_DESC = '音悦Tai'
+    _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://v.yinyuetai.com/video/2322376',
+        'md5': '6e3abe28d38e3a54b591f9f040595ce0',
+        'info_dict': {
+            'id': '2322376',
+            'ext': 'mp4',
+            'title': '少女时代_PARTY_Music Video Teaser',
+            'creator': '少女时代',
+            'duration': 25,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://v.yinyuetai.com/video/h5/2322376',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
+            'Downloading mv info')['videoInfo']['coreVideoInfo']
+
+        if info['error']:
+            raise ExtractorError(info['errorMsg'], expected=True)
+
+        formats = [{
+            'url': format_info['videoUrl'],
+            'format_id': format_info['qualityLevel'],
+            'format': format_info.get('qualityLevelName'),
+            'filesize': format_info.get('fileSize'),
+            # though URLs ends with .flv, the downloaded files are in fact mp4
+            'ext': 'mp4',
+            'tbr': format_info.get('bitrate'),
+        } for format_info in info['videoUrlModels']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['videoName'],
+            'thumbnail': info.get('bigHeadImage'),
+            'creator': info.get('artistNames'),
+            'duration': info.get('duration'),
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py

index ced3a10cd417840796d4240cfd1d8163f3148134..78caeb8b36e0be8cf4e97365d9e28251723059b7 100644 (file)
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -15,6 +15,7 @@ from ..compat import (
  
  class YoukuIE(InfoExtractor):
      IE_NAME = 'youku'
+    IE_DESC = '优酷'
      _VALID_URL = r'''(?x)
          (?:
              http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index a3da56c1413494ffd73d523a042959af16956ff0..3c629d38a1c7cea5f0b45d600c41f9d9ff658873 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -29,9 +29,11 @@ from ..utils import (
      get_element_by_id,
      int_or_none,
      orderedSet,
+    str_to_int,
      unescapeHTML,
      unified_strdate,
      uppercase_escape,
+    ISO3166Utils,
  )
  
  
@@ -518,6 +520,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'skip_download': 'requires avconv',
              }
          },
+        # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
+        {
+            'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+            'info_dict': {
+                'id': 'FIl7x6_3R5Y',
+                'ext': 'mp4',
+                'title': 'md5:7b81415841e02ecd4313668cde88737a',
+                'description': 'md5:116377fd2963b81ec4ce64b542173306',
+                'upload_date': '20150625',
+                'uploader_id': 'dorappi2000',
+                'uploader': 'dorappi2000',
+                'formats': 'mincount:33',
+            },
+        }
      ]
  
      def __init__(self, *args, **kwargs):
@@ -782,7 +798,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
  
      def _parse_dash_manifest(
-            self, video_id, dash_manifest_url, player_url, age_gate):
+            self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
          def decrypt_sig(mobj):
              s = mobj.group(1)
              dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
@@ -791,7 +807,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          dash_doc = self._download_xml(
              dash_manifest_url, video_id,
              note='Downloading DASH manifest',
-            errnote='Could not download DASH manifest')
+            errnote='Could not download DASH manifest',
+            fatal=fatal)
+
+        if dash_doc is False:
+            return []
  
          formats = []
          for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
@@ -824,6 +844,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      except StopIteration:
                          full_info = self._formats.get(format_id, {}).copy()
                          full_info.update(f)
+                        codecs = r.attrib.get('codecs')
+                        if codecs:
+                            if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
+                                full_info['vcodec'] = codecs
+                            elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
+                                full_info['acodec'] = codecs
                          formats.append(full_info)
                      else:
                          existing_format.update(f)
@@ -853,6 +879,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          else:
              player_url = None
  
+        dash_mpds = []
+
+        def add_dash_mpd(video_info):
+            dash_mpd = video_info.get('dashmpd')
+            if dash_mpd and dash_mpd[0] not in dash_mpds:
+                dash_mpds.append(dash_mpd[0])
+
          # Get video info
          embed_webpage = None
          if re.search(r'player-age-gate-content">', video_webpage) is not None:
@@ -873,24 +906,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  note='Refetching age-gated info webpage',
                  errnote='unable to download video info webpage')
              video_info = compat_parse_qs(video_info_webpage)
+            add_dash_mpd(video_info)
          else:
              age_gate = False
-            try:
-                # Try looking directly into the video webpage
-                mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
-                if not mobj:
-                    raise ValueError('Could not find ytplayer.config')  # caught below
+            video_info = None
+            # Try looking directly into the video webpage
+            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
+            if mobj:
                  json_code = uppercase_escape(mobj.group(1))
                  ytplayer_config = json.loads(json_code)
                  args = ytplayer_config['args']
-                # Convert to the same format returned by compat_parse_qs
-                video_info = dict((k, [v]) for k, v in args.items())
-                if not args.get('url_encoded_fmt_stream_map'):
-                    raise ValueError('No stream_map present')  # caught below
-            except ValueError:
-                # We fallback to the get_video_info pages (used by the embed page)
+                if args.get('url_encoded_fmt_stream_map'):
+                    # Convert to the same format returned by compat_parse_qs
+                    video_info = dict((k, [v]) for k, v in args.items())
+                    add_dash_mpd(video_info)
+            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+                # We also try looking in get_video_info since it may contain different dashmpd
+                # URL that points to a DASH manifest with possibly different itag set (some itags
+                # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
+                # manifest pointed by get_video_info's dashmpd).
+                # The general idea is to take a union of itags of both DASH manifests (for example
+                # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
                  self.report_video_info_webpage_download(video_id)
-                for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
+                for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
                      video_info_url = (
                          '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                          % (proto, video_id, el_type))
@@ -898,11 +936,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          video_info_url,
                          video_id, note=False,
                          errnote='unable to download video info webpage')
-                    video_info = compat_parse_qs(video_info_webpage)
-                    if 'token' in video_info:
+                    get_video_info = compat_parse_qs(video_info_webpage)
+                    add_dash_mpd(get_video_info)
+                    if not video_info:
+                        video_info = get_video_info
+                    if 'token' in get_video_info:
                          break
          if 'token' not in video_info:
              if 'reason' in video_info:
+                if 'The uploader has not made this video available in your country.' in video_info['reason']:
+                    regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
+                    if regions_allowed is not None:
+                        raise ExtractorError('YouTube said: This video is available in %s only' % (
+                            ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
+                            expected=True)
                  raise ExtractorError(
                      'YouTube said: %s' % video_info['reason'][0],
                      expected=True, video_id=video_id)
@@ -956,15 +1003,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
  
          # upload date
-        upload_date = None
-        mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
-        if mobj is None:
-            mobj = re.search(
-                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
-                video_webpage)
-        if mobj is not None:
-            upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
-            upload_date = unified_strdate(upload_date)
+        upload_date = self._html_search_meta(
+            'datePublished', video_webpage, 'upload date', default=None)
+        if not upload_date:
+            upload_date = self._search_regex(
+                [r'(?s)id="eow-date.*?>(.*?)</span>',
+                 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
+                video_webpage, 'upload date', default=None)
+            if upload_date:
+                upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
+        upload_date = unified_strdate(upload_date)
  
          m_cat_container = self._search_regex(
              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
@@ -998,12 +1046,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  video_description = ''
  
          def _extract_count(count_name):
-            count = self._search_regex(
-                r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
-                video_webpage, count_name, default=None)
-            if count is not None:
-                return int(count.replace(',', ''))
-            return None
+            return str_to_int(self._search_regex(
+                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+                % re.escape(count_name),
+                video_webpage, count_name, default=None))
+
          like_count = _extract_count('like')
          dislike_count = _extract_count('dislike')
  
@@ -1118,24 +1165,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
          # Look for the DASH manifest
          if self._downloader.params.get('youtube_include_dash_manifest', True):
-            dash_mpd = video_info.get('dashmpd')
-            if dash_mpd:
-                dash_manifest_url = dash_mpd[0]
+            dash_mpd_fatal = True
+            for dash_manifest_url in dash_mpds:
+                dash_formats = {}
                  try:
-                    dash_formats = self._parse_dash_manifest(
-                        video_id, dash_manifest_url, player_url, age_gate)
+                    for df in self._parse_dash_manifest(
+                            video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
+                        # Do not overwrite DASH format found in some previous DASH manifest
+                        if df['format_id'] not in dash_formats:
+                            dash_formats[df['format_id']] = df
+                        # Additional DASH manifests may end up in HTTP Error 403 therefore
+                        # allow them to fail without bug report message if we already have
+                        # some DASH manifest succeeded. This is temporary workaround to reduce
+                        # burst of bug reports until we figure out the reason and whether it
+                        # can be fixed at all.
+                        dash_mpd_fatal = False
                  except (ExtractorError, KeyError) as e:
                      self.report_warning(
                          'Skipping DASH manifest: %r' % e, video_id)
-                else:
+                if dash_formats:
                      # Remove the formats we found through non-DASH, they
                      # contain less info and it can be wrong, because we use
                      # fixed values (for example the resolution). See
                      # https://github.com/rg3/youtube-dl/issues/5774 for an
                      # example.
-                    dash_keys = set(df['format_id'] for df in dash_formats)
-                    formats = [f for f in formats if f['format_id'] not in dash_keys]
-                    formats.extend(dash_formats)
+                    formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
+                    formats.extend(dash_formats.values())
  
          # Check for malformed aspect ratio
          stretched_m = re.search(
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index 740458e51483f45f8d8474d68edaaac48b24941e..85365d769ae8febd697b778c11cd1e2a4bc0acb6 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -150,6 +150,10 @@ def parseOpts(overrideArguments=None):
          '--extractor-descriptions',
          action='store_true', dest='list_extractor_descriptions', default=False,
          help='Output descriptions of all supported extractors')
+    general.add_option(
+        '--force-generic-extractor',
+        action='store_true', dest='force_generic_extractor', default=False,
+        help='Force extraction to use the generic extractor')
      general.add_option(
          '--default-search',
          dest='default_search', metavar='PREFIX',
@@ -342,12 +346,13 @@ def parseOpts(overrideArguments=None):
      video_format.add_option(
          '--youtube-skip-dash-manifest',
          action='store_false', dest='youtube_include_dash_manifest',
-        help='Do not download the DASH manifest on YouTube videos')
+        help='Do not download the DASH manifests and related data on YouTube videos')
      video_format.add_option(
          '--merge-output-format',
          action='store', dest='merge_output_format', metavar='FORMAT', default=None,
          help=(
-            'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
+            'If a merge is required (e.g. bestvideo+bestaudio), '
+            'output to given container format. One of mkv, mp4, ogg, webm, flv. '
              'Ignored if no merge is required'))
  
      subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
@@ -686,7 +691,11 @@ def parseOpts(overrideArguments=None):
      postproc.add_option(
          '--recode-video',
          metavar='FORMAT', dest='recodevideo', default=None,
-        help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
+        help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
+    postproc.add_option(
+        '--postprocessor-args',
+        dest='postprocessor_args', metavar='ARGS',
+        help='Give these arguments to the postprocessor')
      postproc.add_option(
          '-k', '--keep-video',
          action='store_true', dest='keepvideo', default=False,
diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py

index 3b0e8ddd8bfed92f2e9043b0adea2655a9e5f67b..4191d040bb1da468e248d57b78df1afdacf64cd0 100644 (file)
--- a/youtube_dl/postprocessor/common.py
+++ b/youtube_dl/postprocessor/common.py
@@ -23,6 +23,9 @@ class PostProcessor(object):
  
      PostProcessor objects follow a "mutual registration" process similar
      to InfoExtractor objects.
+
+    Optionally PostProcessor can use a list of additional command-line arguments
+    with self._configuration_args.
      """
  
      _downloader = None
@@ -57,6 +60,13 @@ class PostProcessor(object):
          except Exception:
              self._downloader.report_warning(errnote)
  
+    def _configuration_args(self, default=[]):
+        pp_args = self._downloader.params.get('postprocessor_args')
+        if pp_args is None:
+            return default
+        assert isinstance(pp_args, list)
+        return pp_args
+
  
  class AudioConversionError(PostProcessingError):
      pass
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index fe7e0a8eea6ad81c8700f0a1d5415f7cccf1d756..1f723908be8d4ff0247affc5aed9ffc44e777602 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -131,6 +131,8 @@ class FFmpegPostProcessor(PostProcessor):
          oldest_mtime = min(
              os.stat(encodeFilename(path)).st_mtime for path in input_paths)
  
+        opts += self._configuration_args()
+
          files_cmd = []
          for path in input_paths:
              files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
@@ -263,7 +265,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
          if (new_path == path or
                  (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
-            self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+            self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
              return [], information
  
          try:
@@ -294,13 +296,16 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
  
      def run(self, information):
          path = information['filepath']
-        prefix, sep, ext = path.rpartition('.')
-        outpath = prefix + sep + self._preferedformat
          if information['ext'] == self._preferedformat:
              self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
              return [], information
+        options = []
+        if self._preferedformat == 'avi':
+            options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
+        prefix, sep, ext = path.rpartition('.')
+        outpath = prefix + sep + self._preferedformat
          self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
-        self.run_ffmpeg(path, outpath, [])
+        self.run_ffmpeg(path, outpath, options)
          information['filepath'] = outpath
          information['format'] = self._preferedformat
          information['ext'] = self._preferedformat
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index a2746b2d1f0106ab9d9e651250a070df34af2d11..942f76d2452c06a261d75e03cebc999fff02874c 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -62,6 +62,8 @@ std_headers = {
  }
  
  
+NO_DEFAULT = object()
+
  ENGLISH_MONTH_NAMES = [
      'January', 'February', 'March', 'April', 'May', 'June',
      'July', 'August', 'September', 'October', 'November', 'December']
@@ -171,13 +173,15 @@ def xpath_with_ns(path, ns_map):
      return '/'.join(replaced)
  
  
-def xpath_text(node, xpath, name=None, fatal=False):
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
      if sys.version_info < (2, 7):  # Crazy 2.6
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
      if n is None or n.text is None:
-        if fatal:
+        if default is not NO_DEFAULT:
+            return default
+        elif fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
          else:
@@ -2084,6 +2088,266 @@ class ISO639Utils(object):
                  return short_name
  
  
+class ISO3166Utils(object):
+    # From http://data.okfn.org/data/core/country-list
+    _country_map = {
+        'AF': 'Afghanistan',
+        'AX': 'Åland Islands',
+        'AL': 'Albania',
+        'DZ': 'Algeria',
+        'AS': 'American Samoa',
+        'AD': 'Andorra',
+        'AO': 'Angola',
+        'AI': 'Anguilla',
+        'AQ': 'Antarctica',
+        'AG': 'Antigua and Barbuda',
+        'AR': 'Argentina',
+        'AM': 'Armenia',
+        'AW': 'Aruba',
+        'AU': 'Australia',
+        'AT': 'Austria',
+        'AZ': 'Azerbaijan',
+        'BS': 'Bahamas',
+        'BH': 'Bahrain',
+        'BD': 'Bangladesh',
+        'BB': 'Barbados',
+        'BY': 'Belarus',
+        'BE': 'Belgium',
+        'BZ': 'Belize',
+        'BJ': 'Benin',
+        'BM': 'Bermuda',
+        'BT': 'Bhutan',
+        'BO': 'Bolivia, Plurinational State of',
+        'BQ': 'Bonaire, Sint Eustatius and Saba',
+        'BA': 'Bosnia and Herzegovina',
+        'BW': 'Botswana',
+        'BV': 'Bouvet Island',
+        'BR': 'Brazil',
+        'IO': 'British Indian Ocean Territory',
+        'BN': 'Brunei Darussalam',
+        'BG': 'Bulgaria',
+        'BF': 'Burkina Faso',
+        'BI': 'Burundi',
+        'KH': 'Cambodia',
+        'CM': 'Cameroon',
+        'CA': 'Canada',
+        'CV': 'Cape Verde',
+        'KY': 'Cayman Islands',
+        'CF': 'Central African Republic',
+        'TD': 'Chad',
+        'CL': 'Chile',
+        'CN': 'China',
+        'CX': 'Christmas Island',
+        'CC': 'Cocos (Keeling) Islands',
+        'CO': 'Colombia',
+        'KM': 'Comoros',
+        'CG': 'Congo',
+        'CD': 'Congo, the Democratic Republic of the',
+        'CK': 'Cook Islands',
+        'CR': 'Costa Rica',
+        'CI': 'Côte d\'Ivoire',
+        'HR': 'Croatia',
+        'CU': 'Cuba',
+        'CW': 'Curaçao',
+        'CY': 'Cyprus',
+        'CZ': 'Czech Republic',
+        'DK': 'Denmark',
+        'DJ': 'Djibouti',
+        'DM': 'Dominica',
+        'DO': 'Dominican Republic',
+        'EC': 'Ecuador',
+        'EG': 'Egypt',
+        'SV': 'El Salvador',
+        'GQ': 'Equatorial Guinea',
+        'ER': 'Eritrea',
+        'EE': 'Estonia',
+        'ET': 'Ethiopia',
+        'FK': 'Falkland Islands (Malvinas)',
+        'FO': 'Faroe Islands',
+        'FJ': 'Fiji',
+        'FI': 'Finland',
+        'FR': 'France',
+        'GF': 'French Guiana',
+        'PF': 'French Polynesia',
+        'TF': 'French Southern Territories',
+        'GA': 'Gabon',
+        'GM': 'Gambia',
+        'GE': 'Georgia',
+        'DE': 'Germany',
+        'GH': 'Ghana',
+        'GI': 'Gibraltar',
+        'GR': 'Greece',
+        'GL': 'Greenland',
+        'GD': 'Grenada',
+        'GP': 'Guadeloupe',
+        'GU': 'Guam',
+        'GT': 'Guatemala',
+        'GG': 'Guernsey',
+        'GN': 'Guinea',
+        'GW': 'Guinea-Bissau',
+        'GY': 'Guyana',
+        'HT': 'Haiti',
+        'HM': 'Heard Island and McDonald Islands',
+        'VA': 'Holy See (Vatican City State)',
+        'HN': 'Honduras',
+        'HK': 'Hong Kong',
+        'HU': 'Hungary',
+        'IS': 'Iceland',
+        'IN': 'India',
+        'ID': 'Indonesia',
+        'IR': 'Iran, Islamic Republic of',
+        'IQ': 'Iraq',
+        'IE': 'Ireland',
+        'IM': 'Isle of Man',
+        'IL': 'Israel',
+        'IT': 'Italy',
+        'JM': 'Jamaica',
+        'JP': 'Japan',
+        'JE': 'Jersey',
+        'JO': 'Jordan',
+        'KZ': 'Kazakhstan',
+        'KE': 'Kenya',
+        'KI': 'Kiribati',
+        'KP': 'Korea, Democratic People\'s Republic of',
+        'KR': 'Korea, Republic of',
+        'KW': 'Kuwait',
+        'KG': 'Kyrgyzstan',
+        'LA': 'Lao People\'s Democratic Republic',
+        'LV': 'Latvia',
+        'LB': 'Lebanon',
+        'LS': 'Lesotho',
+        'LR': 'Liberia',
+        'LY': 'Libya',
+        'LI': 'Liechtenstein',
+        'LT': 'Lithuania',
+        'LU': 'Luxembourg',
+        'MO': 'Macao',
+        'MK': 'Macedonia, the Former Yugoslav Republic of',
+        'MG': 'Madagascar',
+        'MW': 'Malawi',
+        'MY': 'Malaysia',
+        'MV': 'Maldives',
+        'ML': 'Mali',
+        'MT': 'Malta',
+        'MH': 'Marshall Islands',
+        'MQ': 'Martinique',
+        'MR': 'Mauritania',
+        'MU': 'Mauritius',
+        'YT': 'Mayotte',
+        'MX': 'Mexico',
+        'FM': 'Micronesia, Federated States of',
+        'MD': 'Moldova, Republic of',
+        'MC': 'Monaco',
+        'MN': 'Mongolia',
+        'ME': 'Montenegro',
+        'MS': 'Montserrat',
+        'MA': 'Morocco',
+        'MZ': 'Mozambique',
+        'MM': 'Myanmar',
+        'NA': 'Namibia',
+        'NR': 'Nauru',
+        'NP': 'Nepal',
+        'NL': 'Netherlands',
+        'NC': 'New Caledonia',
+        'NZ': 'New Zealand',
+        'NI': 'Nicaragua',
+        'NE': 'Niger',
+        'NG': 'Nigeria',
+        'NU': 'Niue',
+        'NF': 'Norfolk Island',
+        'MP': 'Northern Mariana Islands',
+        'NO': 'Norway',
+        'OM': 'Oman',
+        'PK': 'Pakistan',
+        'PW': 'Palau',
+        'PS': 'Palestine, State of',
+        'PA': 'Panama',
+        'PG': 'Papua New Guinea',
+        'PY': 'Paraguay',
+        'PE': 'Peru',
+        'PH': 'Philippines',
+        'PN': 'Pitcairn',
+        'PL': 'Poland',
+        'PT': 'Portugal',
+        'PR': 'Puerto Rico',
+        'QA': 'Qatar',
+        'RE': 'Réunion',
+        'RO': 'Romania',
+        'RU': 'Russian Federation',
+        'RW': 'Rwanda',
+        'BL': 'Saint Barthélemy',
+        'SH': 'Saint Helena, Ascension and Tristan da Cunha',
+        'KN': 'Saint Kitts and Nevis',
+        'LC': 'Saint Lucia',
+        'MF': 'Saint Martin (French part)',
+        'PM': 'Saint Pierre and Miquelon',
+        'VC': 'Saint Vincent and the Grenadines',
+        'WS': 'Samoa',
+        'SM': 'San Marino',
+        'ST': 'Sao Tome and Principe',
+        'SA': 'Saudi Arabia',
+        'SN': 'Senegal',
+        'RS': 'Serbia',
+        'SC': 'Seychelles',
+        'SL': 'Sierra Leone',
+        'SG': 'Singapore',
+        'SX': 'Sint Maarten (Dutch part)',
+        'SK': 'Slovakia',
+        'SI': 'Slovenia',
+        'SB': 'Solomon Islands',
+        'SO': 'Somalia',
+        'ZA': 'South Africa',
+        'GS': 'South Georgia and the South Sandwich Islands',
+        'SS': 'South Sudan',
+        'ES': 'Spain',
+        'LK': 'Sri Lanka',
+        'SD': 'Sudan',
+        'SR': 'Suriname',
+        'SJ': 'Svalbard and Jan Mayen',
+        'SZ': 'Swaziland',
+        'SE': 'Sweden',
+        'CH': 'Switzerland',
+        'SY': 'Syrian Arab Republic',
+        'TW': 'Taiwan, Province of China',
+        'TJ': 'Tajikistan',
+        'TZ': 'Tanzania, United Republic of',
+        'TH': 'Thailand',
+        'TL': 'Timor-Leste',
+        'TG': 'Togo',
+        'TK': 'Tokelau',
+        'TO': 'Tonga',
+        'TT': 'Trinidad and Tobago',
+        'TN': 'Tunisia',
+        'TR': 'Turkey',
+        'TM': 'Turkmenistan',
+        'TC': 'Turks and Caicos Islands',
+        'TV': 'Tuvalu',
+        'UG': 'Uganda',
+        'UA': 'Ukraine',
+        'AE': 'United Arab Emirates',
+        'GB': 'United Kingdom',
+        'US': 'United States',
+        'UM': 'United States Minor Outlying Islands',
+        'UY': 'Uruguay',
+        'UZ': 'Uzbekistan',
+        'VU': 'Vanuatu',
+        'VE': 'Venezuela, Bolivarian Republic of',
+        'VN': 'Viet Nam',
+        'VG': 'Virgin Islands, British',
+        'VI': 'Virgin Islands, U.S.',
+        'WF': 'Wallis and Futuna',
+        'EH': 'Western Sahara',
+        'YE': 'Yemen',
+        'ZM': 'Zambia',
+        'ZW': 'Zimbabwe',
+    }
+
+    @classmethod
+    def short2full(cls, code):
+        """Convert an ISO 3166-2 country code to the corresponding full name"""
+        return cls._country_map.get(code.upper())
+
+
  class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
      def __init__(self, proxies=None):
          # Set default handlers
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 34a13cb815d2c11f85f33f725d2f9bcac2a11865..3364647ed54b3a76b609fb19922310aa19929be2 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.06.15'
+__version__ = '2015.07.07'
author	Sergey M. <dstftw@gmail.com>
	Fri, 17 Jul 2015 16:08:40 +0000 (22:08 +0600)
committer	Sergey M. <dstftw@gmail.com>
	Fri, 17 Jul 2015 16:08:40 +0000 (22:08 +0600)
AUTHORS		patch \| blob \| history
README.md		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/downloader/external.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/baidu.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/clipsyndicate.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/crunchyroll.py		patch \| blob \| history
youtube_dl/extractor/ctsnews.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/dfb.py		patch \| blob \| history
youtube_dl/extractor/douyutv.py		patch \| blob \| history
youtube_dl/extractor/dramafever.py		patch \| blob \| history
youtube_dl/extractor/drbonanza.py		patch \| blob \| history
youtube_dl/extractor/drtuber.py		patch \| blob \| history
youtube_dl/extractor/empflix.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/gfycat.py		patch \| blob \| history
youtube_dl/extractor/gorillavid.py		patch \| blob \| history
youtube_dl/extractor/hentaistigma.py		patch \| blob \| history
youtube_dl/extractor/hostingbulk.py		patch \| blob \| history
youtube_dl/extractor/howcast.py		patch \| blob \| history
youtube_dl/extractor/ina.py		patch \| blob \| history
youtube_dl/extractor/infoq.py		patch \| blob \| history
youtube_dl/extractor/iqiyi.py		patch \| blob \| history
youtube_dl/extractor/jeuxvideo.py		patch \| blob \| history
youtube_dl/extractor/kuwo.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/letv.py		patch \| blob \| history
youtube_dl/extractor/lynda.py		patch \| blob \| history
youtube_dl/extractor/myspass.py		patch \| blob \| history
youtube_dl/extractor/myvi.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/neteasemusic.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/newstube.py		patch \| blob \| history
youtube_dl/extractor/nextmedia.py		patch \| blob \| history
youtube_dl/extractor/noco.py		patch \| blob \| history
youtube_dl/extractor/nowtv.py		patch \| blob \| history
youtube_dl/extractor/npo.py		patch \| blob \| history
youtube_dl/extractor/nrk.py		patch \| blob \| history
youtube_dl/extractor/onionstudios.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pbs.py		patch \| blob \| history
youtube_dl/extractor/planetaplay.py		patch \| blob \| history
youtube_dl/extractor/played.py		patch \| blob \| history
youtube_dl/extractor/primesharetv.py		patch \| blob \| history
youtube_dl/extractor/promptfile.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/qqmusic.py		patch \| blob \| history
youtube_dl/extractor/quickvid.py		patch \| blob \| history
youtube_dl/extractor/rds.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/rtlnl.py		patch \| blob \| history
youtube_dl/extractor/shared.py		patch \| blob \| history
youtube_dl/extractor/smotri.py		patch \| blob \| history
youtube_dl/extractor/snagfilms.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/spiegeltv.py		patch \| blob \| history
youtube_dl/extractor/thesixtyone.py		patch \| blob \| history
youtube_dl/extractor/thisamericanlife.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tnaflix.py		patch \| blob \| history
youtube_dl/extractor/twitch.py		patch \| blob \| history
youtube_dl/extractor/twitter.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/udemy.py		patch \| blob \| history
youtube_dl/extractor/udn.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/vimple.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/extractor/vodlocker.py		patch \| blob \| history
youtube_dl/extractor/vube.py		patch \| blob \| history
youtube_dl/extractor/webofstories.py		patch \| blob \| history
youtube_dl/extractor/xuite.py		patch \| blob \| history
youtube_dl/extractor/yam.py		patch \| blob \| history
youtube_dl/extractor/yinyuetai.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/youku.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/postprocessor/common.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history