Merge branch 'pinkbike' of https://github.com/misterhat/youtube-dl into misterhat...

author Sergey M․ <dstftw@gmail.com>

Fri, 19 Jun 2015 17:00:00 +0000 (23:00 +0600)

committer Sergey M․ <dstftw@gmail.com>

Fri, 19 Jun 2015 17:00:00 +0000 (23:00 +0600)
author Sergey M․ <dstftw@gmail.com>
Fri, 19 Jun 2015 17:00:00 +0000 (23:00 +0600)
committer Sergey M․ <dstftw@gmail.com>
Fri, 19 Jun 2015 17:00:00 +0000 (23:00 +0600)
diff --git a/AUTHORS b/AUTHORS

index ebed7ebb3a3222492111d7579ae5bdd10096cf6b..889d599a2d113b83dc505a664207d7258398fda6 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -125,3 +125,6 @@ Roman Le Négrate
  Matthias Küch
  Julian Richen
  Ping O.
+Mister Hat
+Peter Ding
+jackyzy823
diff --git a/README.md b/README.md

index e51bb534341e389a26a466f1fb4c3ef721731016..5f3a08f5a9839e2438498cc32d284b341c4fb7f8 100644 (file)
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ which means you can modify it, redistribute it or use it however you like.
      -i, --ignore-errors              Continue on download errors, for example to skip unavailable videos in a playlist
      --abort-on-error                 Abort downloading of further videos (in the playlist or the command line) if an error occurs
      --dump-user-agent                Display the current browser identification
-    --list-extractors                List all supported extractors and the URLs they would handle
+    --list-extractors                List all supported extractors
      --extractor-descriptions         Output descriptions of all supported extractors
      --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
                                       Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
@@ -168,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like.
      --no-progress                    Do not print progress bar
      --console-title                  Display progress in console titlebar
      -v, --verbose                    Print various debugging information
-    --dump-pages                     Print downloaded pages to debug problems (very verbose)
+    --dump-pages                     Print downloaded pages encoded using base64 to debug problems (very verbose)
      --write-pages                    Write downloaded intermediary pages to files in the current directory to debug problems
      --print-traffic                  Display sent and read HTTP traffic
      -C, --call-home                  Contact the youtube-dl server for debugging
@@ -220,10 +220,10 @@ which means you can modify it, redistribute it or use it however you like.
      --embed-thumbnail                Embed thumbnail in the audio as cover art
      --add-metadata                   Write metadata to the video file
      --metadata-from-title FORMAT     Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
-                                     parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+                                     parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s -
                                       %(title)s" matches a title like "Coldplay - Paradise"
      --xattrs                         Write metadata to the video file's xattrs (using dublin core and xdg standards)
-    --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+    --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default;
                                       fix file if we can, warn otherwise)
      --prefer-avconv                  Prefer avconv over ffmpeg for running the postprocessors (default)
      --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the postprocessors
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index a4879bd9a1a4b5221e824927e0ef0ec4d6c9f734..220e52b988ace1314ec866f59a8ef34562f1b583 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -10,6 +10,7 @@
   - **56.com**
   - **5min**
   - **8tracks**
+ - **91porn**
   - **9gag**
   - **abc.net.au**
   - **Abc7News**
@@ -26,8 +27,7 @@
   - **anitube.se**
   - **AnySex**
   - **Aparat**
- - **AppleDailyAnimationNews**
- - **AppleDailyRealtimeNews**
+ - **AppleDaily**
   - **AppleTrailers**
   - **archive.org**: archive.org videos
   - **ARD**
@@ -120,6 +120,8 @@
   - **divxstage**: DivxStage
   - **Dotsub**
   - **DouyuTV**
+ - **dramafever**
+ - **dramafever:series**
   - **DRBonanza**
   - **Dropbox**
   - **DrTuber**
@@ -152,8 +154,8 @@
   - **fc2**
   - **fernsehkritik.tv**
   - **fernsehkritik.tv:postecke**
- - **Firedrive**
   - **Firstpost**
+ - **FiveTV**
   - **Flickr**
   - **Folketinget**: Folketinget (ft.dk; Danish parliament)
   - **FootyRoom**
@@ -218,6 +220,7 @@
   - **instagram:user**: Instagram user profile
   - **InternetVideoArchive**
   - **IPrima**
+ - **iqiyi**
   - **ivi**: ivi.ru
   - **ivi:compilation**: ivi.ru compilations
   - **Izlesene**
@@ -230,6 +233,7 @@
   - **KanalPlay**: Kanal 5/9/11 Play
   - **Kankan**
   - **Karaoketv**
+ - **KarriereVideos**
   - **keek**
   - **KeezMovies**
   - **KhanAcademy**
@@ -320,8 +324,10 @@
   - **Noco**
   - **Normalboots**
   - **NosVideo**
+ - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
   - **novamov**: NovaMov
   - **Nowness**
+ - **NowTV**
   - **nowvideo**: NowVideo
   - **npo.nl**
   - **npo.nl:live**
@@ -393,7 +399,6 @@
   - **Rte**
   - **rtl.nl**: rtl.nl and rtlxl.nl
   - **RTL2**
- - **RTLnow**
   - **RTP**
   - **RTS**: RTS.ch
   - **rtve.es:alacarta**: RTVE a la carta
@@ -406,6 +411,7 @@
   - **rutube:movie**: Rutube movies
   - **rutube:person**: Rutube person videos
   - **RUTV**: RUTV.RU
+ - **Ruutu**
   - **safari**: safaribooksonline.com online video
   - **safari:course**: safaribooksonline.com online courses
   - **Sandia**: Sandia National Laboratories
@@ -431,8 +437,9 @@
   - **smotri:community**: Smotri.com community videos
   - **smotri:user**: Smotri.com user videos
   - **Snotr**
- - **Sockshare**
   - **Sohu**
+ - **soompi**
+ - **soompi:show**
   - **soundcloud**
   - **soundcloud:playlist**
   - **soundcloud:set**
@@ -507,6 +514,7 @@
   - **Trilulilu**
   - **TruTube**
   - **Tube8**
+ - **TubiTv**
   - **Tudou**
   - **Tumblr**
   - **TuneIn**
@@ -516,6 +524,8 @@
   - **TV2**
   - **TV2Article**
   - **TV4**: tv4.se and tv4play.se
+ - **TVC**
+ - **TVCArticle**
   - **tvigle**: Интернет-телевидение Tvigle.ru
   - **tvp.pl**
   - **tvp.pl:Series**
@@ -564,6 +574,7 @@
   - **vier:videos**
   - **Viewster**
   - **viki**
+ - **viki:channel**
   - **vimeo**
   - **vimeo:album**
   - **vimeo:channel**
diff --git a/test/test_aes.py b/test/test_aes.py

index 4dc7de7b5b8d55fc1d95296b58e076aba664e1cc..315a3f5ae6a597662d05f56e97672b4ff93aff10 100644 (file)
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -39,7 +39,7 @@ class TestAES(unittest.TestCase):
          encrypted = base64.b64encode(
              intlist_to_bytes(self.iv[:8]) +
              b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
-        )
+        ).decode('utf-8')
          decrypted = (aes_decrypt_text(encrypted, password, 16))
          self.assertEqual(decrypted, self.secret_msg)
  
@@ -47,7 +47,7 @@ class TestAES(unittest.TestCase):
          encrypted = base64.b64encode(
              intlist_to_bytes(self.iv[:8]) +
              b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
-        )
+        ).decode('utf-8')
          decrypted = (aes_decrypt_text(encrypted, password, 32))
          self.assertEqual(decrypted, self.secret_msg)
  
diff --git a/test/test_subtitles.py b/test/test_subtitles.py

index 891ee620b1f2627dd6991e0cccfbc58b59fb6a95..c4e3adb67b7d1034b36cdd3c45969fe321351c64 100644 (file)
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -266,7 +266,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertEqual(set(subtitles.keys()), set(['no']))
-        self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
+        self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
  
  
  class TestRaiSubtitles(BaseTestSubtitles):
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index d1953c18f39b438740aec88a1aadf4d529a8e0b4..6e4b6f56664f67a796b21f10b8d005f2f6e5b68d 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -49,6 +49,7 @@ from .utils import (
      ExtractorError,
      format_bytes,
      formatSeconds,
+    HEADRequest,
      locked_file,
      make_HTTPS_handler,
      MaxDownloadsReached,
@@ -118,7 +119,7 @@ class YoutubeDL(object):
  
      username:          Username for authentication purposes.
      password:          Password for authentication purposes.
-    videopassword:     Password for acces a video.
+    videopassword:     Password for accessing a video.
      usenetrc:          Use netrc for authentication instead.
      verbose:           Print additional info to stdout.
      quiet:             Do not print messages to stdout.
@@ -923,8 +924,9 @@ class YoutubeDL(object):
                  if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
              if audiovideo_formats:
                  return audiovideo_formats[format_idx]
-            # for audio only urls, select the best/worst audio format
-            elif all(f.get('acodec') != 'none' for f in available_formats):
+            # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
+            elif (all(f.get('acodec') != 'none' for f in available_formats) or
+                  all(f.get('vcodec') != 'none' for f in available_formats)):
                  return available_formats[format_idx]
          elif format_spec == 'bestaudio':
              audio_formats = [
@@ -1014,13 +1016,13 @@ class YoutubeDL(object):
              info_dict['display_id'] = info_dict['id']
  
          if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around negative timestamps in Windows
-            # (see http://bugs.python.org/issue1646728)
-            if info_dict['timestamp'] < 0 and os.name == 'nt':
-                info_dict['timestamp'] = 0
-            upload_date = datetime.datetime.utcfromtimestamp(
-                info_dict['timestamp'])
-            info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+            # see http://bugs.python.org/issue1646728)
+            try:
+                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+            except (ValueError, OverflowError, OSError):
+                pass
  
          if self.params.get('listsubtitles', False):
              if 'automatic_captions' in info_dict:
@@ -1031,12 +1033,6 @@ class YoutubeDL(object):
              info_dict['id'], info_dict.get('subtitles'),
              info_dict.get('automatic_captions'))
  
-        # This extractors handle format selection themselves
-        if info_dict['extractor'] in ['Youku']:
-            if download:
-                self.process_info(info_dict)
-            return info_dict
-
          # We now pick which formats have to be downloaded
          if info_dict.get('formats') is None:
              # There's only one format available
@@ -1047,6 +1043,8 @@ class YoutubeDL(object):
          if not formats:
              raise ExtractorError('No video formats found!')
  
+        formats_dict = {}
+
          # We check that all the formats have the format and format_id fields
          for i, format in enumerate(formats):
              if 'url' not in format:
@@ -1054,6 +1052,18 @@ class YoutubeDL(object):
  
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
+            format_id = format['format_id']
+            if format_id not in formats_dict:
+                formats_dict[format_id] = []
+            formats_dict[format_id].append(format)
+
+        # Make sure all formats have unique format_id
+        for format_id, ambiguous_formats in formats_dict.items():
+            if len(ambiguous_formats) > 1:
+                for i, format in enumerate(ambiguous_formats):
+                    format['format_id'] = '%s-%d' % (format_id, i)
+
+        for i, format in enumerate(formats):
              if format.get('format') is None:
                  format['format'] = '{id} - {res}{note}'.format(
                      id=format['format_id'],
@@ -1706,7 +1716,8 @@ class YoutubeDL(object):
              if req_is_string:
                  req = url_escaped
              else:
-                req = compat_urllib_request.Request(
+                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+                req = req_type(
                      url_escaped, data=req.data, headers=req.headers,
                      origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
  
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 80bec39da15c29d8e45688d8cc9c16493f02a7a3..6c548d8e9fd455d04ec1b5c5336eb4e7dfb75039 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -112,6 +112,10 @@ from .dfb import DFBIE
  from .dhm import DHMIE
  from .dotsub import DotsubIE
  from .douyutv import DouyuTVIE
+from .dramafever import (
+    DramaFeverIE,
+    DramaFeverSeriesIE,
+)
  from .dreisat import DreiSatIE
  from .drbonanza import DRBonanzaIE
  from .drtuber import DrTuberIE
@@ -149,10 +153,10 @@ from .extremetube import ExtremeTubeIE
  from .facebook import FacebookIE
  from .faz import FazIE
  from .fc2 import FC2IE
-from .firedrive import FiredriveIE
  from .firstpost import FirstpostIE
  from .firsttv import FirstTVIE
  from .fivemin import FiveMinIE
+from .fivetv import FiveTVIE
  from .fktv import (
      FKTVIE,
      FKTVPosteckeIE,
@@ -230,6 +234,7 @@ from .infoq import InfoQIE
  from .instagram import InstagramIE, InstagramUserIE
  from .internetvideoarchive import InternetVideoArchiveIE
  from .iprima import IPrimaIE
+from .iqiyi import IqiyiIE
  from .ivi import (
      IviIE,
      IviCompilationIE
@@ -353,8 +358,10 @@ from .ninegag import NineGagIE
  from .noco import NocoIE
  from .normalboots import NormalbootsIE
  from .nosvideo import NosVideoIE
+from .nova import NovaIE
  from .novamov import NovaMovIE
  from .nowness import NownessIE
+from .nowtv import NowTVIE
  from .nowvideo import NowVideoIE
  from .npo import (
      NPOIE,
@@ -402,6 +409,7 @@ from .playfm import PlayFMIE
  from .playvid import PlayvidIE
  from .playwire import PlaywireIE
  from .podomatic import PodomaticIE
+from .porn91 import Porn91IE
  from .pornhd import PornHdIE
  from .pornhub import (
      PornHubIE,
@@ -439,7 +447,6 @@ from .roxwel import RoxwelIE
  from .rtbf import RTBFIE
  from .rte import RteIE
  from .rtlnl import RtlNlIE
-from .rtlnow import RTLnowIE
  from .rtl2 import RTL2IE
  from .rtp import RTPIE
  from .rts import RTSIE
@@ -453,6 +460,7 @@ from .rutube import (
      RutubePersonIE,
  )
  from .rutv import RUTVIE
+from .ruutu import RuutuIE
  from .sandia import SandiaIE
  from .safari import (
      SafariIE,
@@ -481,8 +489,11 @@ from .smotri import (
      SmotriBroadcastIE,
  )
  from .snotr import SnotrIE
-from .sockshare import SockshareIE
  from .sohu import SohuIE
+from .soompi import (
+    SoompiIE,
+    SoompiShowIE,
+)
  from .soundcloud import (
      SoundcloudIE,
      SoundcloudSetIE,
@@ -568,6 +579,7 @@ from .traileraddict import TrailerAddictIE
  from .trilulilu import TriluliluIE
  from .trutube import TruTubeIE
  from .tube8 import Tube8IE
+from .tubitv import TubiTvIE
  from .tudou import TudouIE
  from .tumblr import TumblrIE
  from .tunein import TuneInIE
@@ -578,6 +590,10 @@ from .tv2 import (
      TV2ArticleIE,
  )
  from .tv4 import TV4IE
+from .tvc import (
+    TVCIE,
+    TVCArticleIE,
+)
  from .tvigle import TvigleIE
  from .tvp import TvpIE, TvpSeriesIE
  from .tvplay import TVPlayIE
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py

index a117502bc0ad7bfec11592ec57da575898cacc3d..e0518cf261fbffc4dd23bc4a3800d04eae324139 100644 (file)
--- a/youtube_dl/extractor/aftonbladet.py
+++ b/youtube_dl/extractor/aftonbladet.py
@@ -6,11 +6,11 @@ from ..utils import int_or_none
  
  
  class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
+    _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
      _TEST = {
-        'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
+        'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
          'info_dict': {
-            'id': 'article36015',
+            'id': '36015',
              'ext': 'mp4',
              'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
              'description': 'Jupiters måne mest aktiv av alla himlakroppar',
@@ -25,8 +25,9 @@ class AftonbladetIE(InfoExtractor):
  
          # find internal video meta data
          meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
-        internal_meta_id = self._html_search_regex(
-            r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
+        player_config = self._parse_json(self._html_search_regex(
+            r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
+        internal_meta_id = player_config['videoId']
          internal_meta_url = meta_url % internal_meta_id
          internal_meta_json = self._download_json(
              internal_meta_url, video_id, 'Downloading video meta data')
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py

index 249bc6bbde85dc568796f094f421f989df664a1c..0305f88b53b4e3790cd6ff66ee6134741357fff5 100644 (file)
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -129,6 +129,20 @@ class BBCCoUkIE(InfoExtractor):
                  'skip_download': True,
              },
              'skip': 'geolocation',
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
+            'info_dict': {
+                'id': 'b05zmgw1',
+                'ext': 'flv',
+                'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
+                'title': 'Royal Academy Summer Exhibition',
+                'duration': 3540,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'geolocation',
          }, {
              'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
              'only_matching': True,
@@ -267,7 +281,7 @@ class BBCCoUkIE(InfoExtractor):
                  programme_id, 'Downloading media selection XML')
          except ExtractorError as ee:
              if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
-                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
+                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
              else:
                  raise
  
@@ -362,7 +376,7 @@ class BBCCoUkIE(InfoExtractor):
              formats, subtitles = self._download_media_selector(programme_id)
              title = self._og_search_title(webpage)
              description = self._search_regex(
-                r'<p class="medium-description">([^<]+)</p>',
+                r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
                  webpage, 'description', fatal=False)
          else:
              programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py

index 7ca835e31f3477f8e46c74804aea36ecfe789686..bf60450c2f3753aa64d5ae11069bec98b67f23dc 100644 (file)
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
  
  import re
  import itertools
+import json
+import xml.etree.ElementTree as ET
  
  from .common import InfoExtractor
  from ..utils import (
@@ -67,11 +69,19 @@ class BiliBiliIE(InfoExtractor):
  
          entries = []
  
-        lq_doc = self._download_xml(
+        lq_page = self._download_webpage(
              'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
              video_id,
              note='Downloading LQ video info'
          )
+        try:
+            err_info = json.loads(lq_page)
+            raise ExtractorError(
+                'BiliBili said: ' + err_info['error_text'], expected=True)
+        except ValueError:
+            pass
+
+        lq_doc = ET.fromstring(lq_page)
          lq_durls = lq_doc.findall('./durl')
  
          hq_doc = self._download_xml(
@@ -80,9 +90,11 @@ class BiliBiliIE(InfoExtractor):
              note='Downloading HQ video info',
              fatal=False,
          )
-        hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
-
-        assert len(lq_durls) == len(hq_durls)
+        if hq_doc is not False:
+            hq_durls = hq_doc.findall('./durl')
+            assert len(lq_durls) == len(hq_durls)
+        else:
+            hq_durls = itertools.repeat(None)
  
          i = 1
          for lq_durl, hq_durl in zip(lq_durls, hq_durls):
@@ -93,7 +105,7 @@ class BiliBiliIE(InfoExtractor):
                  'filesize': int_or_none(
                      lq_durl.find('./size'), get_attr='text'),
              }]
-            if hq_durl:
+            if hq_durl is not None:
                  formats.append({
                      'format_id': 'hq',
                      'quality': 2,
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 4f60d53660fa7777b9e1b6152967ce2e7e567ec9..d768f99e67bd49de31e3f0df70d832284cbb3482 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -156,6 +156,28 @@ class BrightcoveIE(InfoExtractor):
          linkBase = find_param('linkBaseURL')
          if linkBase is not None:
              params['linkBaseURL'] = linkBase
+        return cls._make_brightcove_url(params)
+
+    @classmethod
+    def _build_brighcove_url_from_js(cls, object_js):
+        # The layout of JS is as follows:
+        # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
+        #   // build Brightcove <object /> XML
+        # }
+        m = re.search(
+            r'''(?x)customBC.\createVideo\(
+                .*?                                                  # skipping width and height
+                ["\'](?P<playerID>\d+)["\']\s*,\s*                   # playerID
+                ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s*  # playerKey begins with AQ and is 50 characters
+                                                                     # in length, however it's appended to itself
+                                                                     # in places, so truncate
+                ["\'](?P<videoID>\d+)["\']                           # @videoPlayer
+            ''', object_js)
+        if m:
+            return cls._make_brightcove_url(m.groupdict())
+
+    @classmethod
+    def _make_brightcove_url(cls, params):
          data = compat_urllib_parse.urlencode(params)
          return cls._FEDERATED_URL_TEMPLATE % data
  
@@ -172,7 +194,7 @@ class BrightcoveIE(InfoExtractor):
          """Return a list of all Brightcove URLs from the webpage """
  
          url_m = re.search(
-            r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
+            r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
              webpage)
          if url_m:
              url = unescapeHTML(url_m.group(1))
@@ -188,7 +210,12 @@ class BrightcoveIE(InfoExtractor):
                  [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
              ).+?>\s*</object>''',
              webpage)
-        return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+        if matches:
+            return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+
+        return list(filter(None, [
+            cls._build_brighcove_url_from_js(custom_bc)
+            for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
  
      def _real_extract(self, url):
          url, smuggled_data = unsmuggle_url(url, {})
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py

index 1ceb9d8d9df6c0268e33de5e34c01a245e134e05..75fffb1563ae9f95bf862ad156111b6962a8429e 100644 (file)
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -4,12 +4,13 @@ from .common import InfoExtractor
  
  
  class CBSIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
+    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
  
      _TESTS = [{
          'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
          'info_dict': {
              'id': '4JUVEwq3wUT7',
+            'display_id': 'connect-chat-feat-garth-brooks',
              'ext': 'flv',
              'title': 'Connect Chat feat. Garth Brooks',
              'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
@@ -24,6 +25,7 @@ class CBSIE(InfoExtractor):
          'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
          'info_dict': {
              'id': 'WWF_5KqY3PK1',
+            'display_id': 'st-vincent',
              'ext': 'flv',
              'title': 'Live on Letterman - St. Vincent',
              'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
@@ -34,12 +36,23 @@ class CBSIE(InfoExtractor):
              'skip_download': True,
          },
          '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
          real_id = self._search_regex(
-            r"video\.settings\.pid\s*=\s*'([^']+)';",
+            [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
              webpage, 'real video ID')
-        return self.url_result('theplatform:%s' % real_id)
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': 'theplatform:%s' % real_id,
+            'display_id': display_id,
+        }
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py

index cf0a7551b7f3df672d20aad7a00d7ced43bf2945..c949a481477c187d9433f39e6e851b87c97edf79 100644 (file)
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -60,6 +60,17 @@ class CinemassacreIE(InfoExtractor):
                  'uploader_id': 'Cinemassacre',
                  'title': 'AVGN: McKids',
              }
+        },
+        {
+            'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
+            'md5': '1376908e49572389e7b06251a53cdd08',
+            'info_dict': {
+                'id': 'Cinemassacre-555779690c440',
+                'ext': 'mp4',
+                'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
+                'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
+                'upload_date': '20150525',
+            }
          }
      ]
  
@@ -72,7 +83,7 @@ class CinemassacreIE(InfoExtractor):
  
          playerdata_url = self._search_regex(
              [
-                r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+                r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
                  r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
              ],
              webpage, 'player data URL', default=None)
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py

index 3145b30514ea2a075f92077b9f87b64c9e8820a7..5dd69bff7ac73bcc0adc4d91c614045ddf116a9c 100644 (file)
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -11,7 +11,7 @@ from ..utils import (
  
  class CNETIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
          'info_dict': {
              'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
@@ -25,7 +25,20 @@ class CNETIE(InfoExtractor):
          'params': {
              'skip_download': 'requires rtmpdump',
          }
-    }
+    }, {
+        'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
+        'info_dict': {
+            'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
+            'ext': 'flv',
+            'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
+            'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
+            'uploader': 'Ashley Esqueda',
+            'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
+        },
+        'params': {
+            'skip_download': True,  # requires rtmpdump
+        },
+    }]
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
@@ -42,7 +55,7 @@ class CNETIE(InfoExtractor):
              raise ExtractorError('Cannot find video data')
  
          mpx_account = data['config']['players']['default']['mpx_account']
-        vid = vdata['files']['rtmp']
+        vid = vdata['files'].get('rtmp', vdata['files']['hds'])
          tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
  
          video_id = vdata['id']
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index cecf917ffb67040739bbacf573297ba891ec9ea1..49e4dc7109e151ae124ed1aac15a9762d00eac21 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -846,7 +846,7 @@ class InfoExtractor(object):
  
      def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                                entry_protocol='m3u8', preference=None,
-                              m3u8_id=None):
+                              m3u8_id=None, note=None, errnote=None):
  
          formats = [{
              'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@@ -865,8 +865,8 @@ class InfoExtractor(object):
  
          m3u8_doc = self._download_webpage(
              m3u8_url, video_id,
-            note='Downloading m3u8 information',
-            errnote='Failed to download m3u8 information')
+            note=note or 'Downloading m3u8 information',
+            errnote=errnote or 'Failed to download m3u8 information')
          last_info = None
          last_media = None
          kv_rex = re.compile(
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 1c77df47ef346173fc11a58396c98768e5afc986..41f0c736d98c229518bacb41fac2f35ce9b80958 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -76,8 +76,8 @@ class CrunchyrollIE(InfoExtractor):
          self._login()
  
      def _decrypt_subtitles(self, data, iv, id):
-        data = bytes_to_intlist(data)
-        iv = bytes_to_intlist(iv)
+        data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
+        iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
          id = int(id)
  
          def obfuscate_key_aux(count, modulo, start):
@@ -179,6 +179,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
  
          return output
  
+    def _extract_subtitles(self, subtitle):
+        sub_root = xml.etree.ElementTree.fromstring(subtitle)
+        return [{
+            'ext': 'srt',
+            'data': self._convert_subtitles_to_srt(sub_root),
+        }, {
+            'ext': 'ass',
+            'data': self._convert_subtitles_to_ass(sub_root),
+        }]
+
      def _get_subtitles(self, video_id, webpage):
          subtitles = {}
          for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
@@ -190,25 +200,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
              data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
              if not id or not iv or not data:
                  continue
-            id = int(id)
-            iv = base64.b64decode(iv)
-            data = base64.b64decode(data)
-
              subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
              lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
              if not lang_code:
                  continue
-            sub_root = xml.etree.ElementTree.fromstring(subtitle)
-            subtitles[lang_code] = [
-                {
-                    'ext': 'srt',
-                    'data': self._convert_subtitles_to_srt(sub_root),
-                },
-                {
-                    'ext': 'ass',
-                    'data': self._convert_subtitles_to_ass(sub_root),
-                },
-            ]
+            subtitles[lang_code] = self._extract_subtitles(subtitle)
          return subtitles
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index db10b8d00b7482b157bd9dd0ecc9ef9b8191ce88..70aa4333c773c39c4e3be451f8b632045ac85b51 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -225,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
  
  class DailymotionUserIE(DailymotionPlaylistIE):
      IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P<user>[^/]+)$'
      _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
      _TESTS = [{
          'url': 'https://www.dailymotion.com/user/nqtv',
@@ -239,7 +239,8 @@ class DailymotionUserIE(DailymotionPlaylistIE):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          user = mobj.group('user')
-        webpage = self._download_webpage(url, user)
+        webpage = self._download_webpage(
+            'https://www.dailymotion.com/user/%s' % user, user)
          full_user = unescapeHTML(self._html_search_regex(
              r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
              webpage, 'user'))
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py

index d3e6675283cddcb8f6a6dfffbfbd1e1ea3da11bc..d6723ecf26ea67356b288df6e5f3bf612141b91a 100644 (file)
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -2,19 +2,19 @@ from __future__ import unicode_literals
  
  from .common import InfoExtractor
  from ..utils import (
+    parse_duration,
      parse_iso8601,
-    int_or_none,
  )
+from ..compat import compat_str
  
  
  class DiscoveryIE(InfoExtractor):
      _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
-        'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
          'info_dict': {
-            'id': 'mission-impossible-outtakes',
-            'ext': 'flv',
+            'id': '20769',
+            'ext': 'mp4',
              'title': 'Mission Impossible Outtakes',
              'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                              ' each other -- to the point of confusing Jamie\'s dog -- and '
@@ -24,22 +24,36 @@ class DiscoveryIE(InfoExtractor):
              'timestamp': 1303099200,
              'upload_date': '20110418',
          },
-    }
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        }
+    }, {
+        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
+        'info_dict': {
+            'id': 'mythbusters-the-simpsons',
+            'title': 'MythBusters: The Simpsons',
+        },
+        'playlist_count': 9,
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        info = self._download_json(url + '?flat=1', video_id)
  
-        info = self._parse_json(self._search_regex(
-            r'(?s)<script type="application/ld\+json">(.*?)</script>',
-            webpage, 'video info'), video_id)
+        video_title = info.get('playlist_title') or info.get('video_title')
  
-        return {
-            'id': video_id,
-            'title': info['name'],
-            'url': info['contentURL'],
-            'description': info.get('description'),
-            'thumbnail': info.get('thumbnailUrl'),
-            'timestamp': parse_iso8601(info.get('uploadDate')),
-            'duration': int_or_none(info.get('duration')),
-        }
+        entries = [{
+            'id': compat_str(video_info['id']),
+            'formats': self._extract_m3u8_formats(
+                video_info['src'], video_id, ext='mp4',
+                note='Download m3u8 information for video %d' % (idx + 1)),
+            'title': video_info['title'],
+            'description': video_info.get('description'),
+            'duration': parse_duration(video_info.get('video_length')),
+            'webpage_url': video_info.get('href'),
+            'thumbnail': video_info.get('thumbnailURL'),
+            'alt_title': video_info.get('secondary_title'),
+            'timestamp': parse_iso8601(video_info.get('publishedDate')),
+        } for idx, video_info in enumerate(info['playlist'])]
+
+        return self.playlist_result(entries, video_id, video_title)
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py

new file mode 100644 (file)

index 0000000..ca41a3a
--- /dev/null
+++ b/youtube_dl/extractor/dramafever.py
@@ -0,0 +1,197 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    determine_ext,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class DramaFeverBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
+    _NETRC_MACHINE = 'dramafever'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'username': username,
+            'password': password,
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        if all(logout_pattern not in response
+               for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
+            error = self._html_search_regex(
+                r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
+                response, 'error message', default=None)
+            if error:
+                raise ExtractorError('Unable to login: %s' % error, expected=True)
+            raise ExtractorError('Unable to log in')
+
+
+class DramaFeverIE(DramaFeverBaseIE):
+    IE_NAME = 'dramafever'
+    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
+    _TEST = {
+        'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
+        'info_dict': {
+            'id': '4512.1',
+            'ext': 'flv',
+            'title': 'Cooking with Shin 4512.1',
+            'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1404336058,
+            'upload_date': '20140702',
+            'duration': 343,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url).replace('/', '.')
+
+        try:
+            feed = self._download_json(
+                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
+                video_id, 'Downloading episode JSON')['channel']['item']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                raise ExtractorError(
+                    'Currently unavailable in your country.', expected=True)
+            raise
+
+        media_group = feed.get('media-group', {})
+
+        formats = []
+        for media_content in media_group['media-content']:
+            src = media_content.get('@attributes', {}).get('url')
+            if not src:
+                continue
+            ext = determine_ext(src)
+            if ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    src, video_id, f4m_id='hds'))
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', m3u8_id='hls'))
+            else:
+                formats.append({
+                    'url': src,
+                })
+        self._sort_formats(formats)
+
+        title = media_group.get('media-title')
+        description = media_group.get('media-description')
+        duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
+        thumbnail = self._proto_relative_url(
+            media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
+        timestamp = parse_iso8601(feed.get('pubDate'), ' ')
+
+        subtitles = {}
+        for media_subtitle in media_group.get('media-subTitle', []):
+            lang = media_subtitle.get('@attributes', {}).get('lang')
+            href = media_subtitle.get('@attributes', {}).get('href')
+            if not lang or not href:
+                continue
+            subtitles[lang] = [{
+                'ext': 'ttml',
+                'url': href,
+            }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
+
+class DramaFeverSeriesIE(DramaFeverBaseIE):
+    IE_NAME = 'dramafever:series'
+    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
+    _TESTS = [{
+        'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
+        'info_dict': {
+            'id': '4512',
+            'title': 'Cooking with Shin',
+            'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
+        },
+        'playlist_count': 4,
+    }, {
+        'url': 'http://www.dramafever.com/drama/124/IRIS/',
+        'info_dict': {
+            'id': '124',
+            'title': 'IRIS',
+            'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
+        },
+        'playlist_count': 20,
+    }]
+
+    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
+    _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
+
+    def _get_consumer_secret(self, video_id):
+        mainjs = self._download_webpage(
+            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
+            video_id, 'Downloading main.js', fatal=False)
+        if not mainjs:
+            return self._CONSUMER_SECRET
+        return self._search_regex(
+            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
+            'consumer secret', default=self._CONSUMER_SECRET)
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+
+        consumer_secret = self._get_consumer_secret(series_id)
+
+        series = self._download_json(
+            'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
+            % (consumer_secret, series_id),
+            series_id, 'Downloading series JSON')['series'][series_id]
+
+        title = clean_html(series['name'])
+        description = clean_html(series.get('description') or series.get('description_short'))
+
+        entries = []
+        for page_num in itertools.count(1):
+            episodes = self._download_json(
+                'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
+                % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
+                series_id, 'Downloading episodes JSON page #%d' % page_num)
+            for episode in episodes.get('value', []):
+                episode_url = episode.get('episode_url')
+                if not episode_url:
+                    continue
+                entries.append(self.url_result(
+                    compat_urlparse.urljoin(url, episode_url),
+                    'DramaFever', episode.get('guid')))
+            if page_num == episodes['num_pages']:
+                break
+
+        return self.playlist_result(entries, series_id, title, description)
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py

index 9a5a8f4bb44039e6c52968801033a3d12a73d835..4827022e088cf33d064acf7ab8cebdadc0d743a0 100644 (file)
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -26,6 +26,6 @@ class EMPFlixIE(TNAFlixIE):
          },
          {
              'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
-            'matching_only': True,
+            'only_matching': True,
          }
      ]
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 937b28fcccf3bd58929adcca1bda9d05966460e5..82dc27bc6ff3ed2edd3b318f3ed3d14e360ef22d 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -50,7 +50,10 @@ class FacebookIE(InfoExtractor):
              'id': '274175099429670',
              'ext': 'mp4',
              'title': 'Facebook video #274175099429670',
-        }
+        },
+        'expected_warnings': [
+            'title'
+        ]
      }, {
          'url': 'https://www.facebook.com/video.php?v=10204634152394104',
          'only_matching': True,
@@ -149,12 +152,12 @@ class FacebookIE(InfoExtractor):
              raise ExtractorError('Cannot find video formats')
  
          video_title = self._html_search_regex(
-            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
-            fatal=False)
+            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
+            default=None)
          if not video_title:
              video_title = self._html_search_regex(
                  r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
-                webpage, 'alternative title', default=None)
+                webpage, 'alternative title', fatal=False)
              video_title = limit_length(video_title, 80)
          if not video_title:
              video_title = 'Facebook video #%s' % video_id
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py

deleted file mode 100644 (file)

index 3191116..0000000
--- a/youtube_dl/extractor/firedrive.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
-from ..utils import (
-    ExtractorError,
-)
-
-
-class FiredriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
-                 '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
-    _FILE_DELETED_REGEX = r'<div class="removed_file_image">'
-
-    _TESTS = [{
-        'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
-        'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
-        'info_dict': {
-            'id': 'FEB892FA160EBD01',
-            'ext': 'flv',
-            'title': 'bbb_theora_486kbit.flv',
-            'thumbnail': 're:^http://.*\.jpg$',
-        },
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        url = 'http://firedrive.com/file/%s' % video_id
-        webpage = self._download_webpage(url, video_id)
-
-        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id,
-                                 expected=True)
-
-        fields = dict(re.findall(r'''(?x)<input\s+
-            type="hidden"\s+
-            name="([^"]+)"\s+
-            value="([^"]*)"
-            ''', webpage))
-
-        post = compat_urllib_parse.urlencode(fields)
-        req = compat_urllib_request.Request(url, post)
-        req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
-        # Apparently, this header is required for confirmation to work.
-        req.add_header('Host', 'www.firedrive.com')
-
-        webpage = self._download_webpage(req, video_id,
-                                         'Downloading video page')
-
-        title = self._search_regex(r'class="external_title_left">(.+)</div>',
-                                   webpage, 'title')
-        thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
-                                       'thumbnail', fatal=False)
-        if thumbnail is not None:
-            thumbnail = 'http:' + thumbnail
-
-        ext = self._search_regex(r'type:\s?\'([^\']+)\',',
-                                 webpage, 'extension', fatal=False)
-        video_url = self._search_regex(
-            r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
-
-        formats = [{
-            'format_id': 'sd',
-            'url': video_url,
-            'ext': ext,
-        }]
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py

new file mode 100644 (file)

index 0000000..13fbc4d
--- /dev/null
+++ b/youtube_dl/extractor/fivetv.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class FiveTVIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    http://
+                        (?:www\.)?5-tv\.ru/
+                        (?:
+                            (?:[^/]+/)+(?P<id>\d+)|
+                            (?P<path>[^/?#]+)(?:[/?#])?
+                        )
+                    '''
+
+    _TESTS = [{
+        'url': 'http://5-tv.ru/news/96814/',
+        'md5': 'bbff554ad415ecf5416a2f48c22d9283',
+        'info_dict': {
+            'id': '96814',
+            'ext': 'mp4',
+            'title': 'Россияне выбрали имя для общенациональной платежной системы',
+            'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 180,
+        },
+    }, {
+        'url': 'http://5-tv.ru/video/1021729/',
+        'info_dict': {
+            'id': '1021729',
+            'ext': 'mp4',
+            'title': '3D принтер',
+            'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 180,
+        },
+    }, {
+        'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
+        'info_dict': {
+            'id': 'glavnoe',
+            'ext': 'mp4',
+            'title': 'Итоги недели с 8 по 14 июня 2015 года',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
+        'only_matching': True,
+    }, {
+        'url': 'http://5-tv.ru/films/1507502/',
+        'only_matching': True,
+    }, {
+        'url': 'http://5-tv.ru/programs/broadcast/508713/',
+        'only_matching': True,
+    }, {
+        'url': 'http://5-tv.ru/angel/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('path')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(
+            r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
+            webpage, 'video url')
+
+        title = self._og_search_title(webpage, default=None) or self._search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title')
+        duration = int_or_none(self._og_search_property(
+            'video:duration', webpage, 'duration', default=None))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'duration': duration,
+        }
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py

index edf555b2987520618b70bf8bd423c5fc1f60e5a9..db0bbec1ec1338e3aeaaa26af19cc0612372995f 100644 (file)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -60,7 +60,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                      continue
                  video_url_parsed = compat_urllib_parse_urlparse(video_url)
                  f4m_url = self._download_webpage(
-                    'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
+                    'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path,
                      video_id, 'Downloading f4m manifest token', fatal=False)
                  if f4m_url:
                      formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 9a7b0d25d790054e39729bab63e42b1ea7a89dff..f6b984300d1e09e9f55a9d618e3966d7778602e9 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -9,6 +9,8 @@ from .common import InfoExtractor
  from .youtube import YoutubeIE
  from ..compat import (
      compat_urllib_parse,
+    compat_urllib_parse_unquote,
+    compat_urllib_request,
      compat_urlparse,
      compat_xml_parse_error,
  )
@@ -32,6 +34,7 @@ from .brightcove import BrightcoveIE
  from .nbc import NBCSportsVPlayerIE
  from .ooyala import OoyalaIE
  from .rutv import RUTVIE
+from .tvc import TVCIE
  from .sportbox import SportBoxEmbedIE
  from .smotri import SmotriIE
  from .condenast import CondeNastIE
@@ -39,6 +42,7 @@ from .udn import UDNEmbedIE
  from .senateisvp import SenateISVPIE
  from .bliptv import BlipTVIE
  from .svt import SVTIE
+from .pornhub import PornHubIE
  
  
  class GenericIE(InfoExtractor):
@@ -46,6 +50,97 @@ class GenericIE(InfoExtractor):
      _VALID_URL = r'.*'
      IE_NAME = 'generic'
      _TESTS = [
+        # Direct link to a video
+        {
+            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
+            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
+            'info_dict': {
+                'id': 'trailer',
+                'ext': 'mp4',
+                'title': 'trailer',
+                'upload_date': '20100513',
+            }
+        },
+        # Direct link to media delivered compressed (until Accept-Encoding is *)
+        {
+            'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
+            'md5': '128c42e68b13950268b648275386fc74',
+            'info_dict': {
+                'id': 'FictionJunction-Parallel_Hearts',
+                'ext': 'flac',
+                'title': 'FictionJunction-Parallel_Hearts',
+                'upload_date': '20140522',
+            },
+            'expected_warnings': [
+                'URL could be a direct video link, returning it as such.'
+            ]
+        },
+        # Direct download with broken HEAD
+        {
+            'url': 'http://ai-radio.org:8000/radio.opus',
+            'info_dict': {
+                'id': 'radio',
+                'ext': 'opus',
+                'title': 'radio',
+            },
+            'params': {
+                'skip_download': True,  # infinite live stream
+            },
+            'expected_warnings': [
+                r'501.*Not Implemented'
+            ],
+        },
+        # Direct link with incorrect MIME type
+        {
+            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+            'md5': '4ccbebe5f36706d85221f204d7eb5913',
+            'info_dict': {
+                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+                'id': '5_Lennart_Poettering_-_Systemd',
+                'ext': 'webm',
+                'title': '5_Lennart_Poettering_-_Systemd',
+                'upload_date': '20141120',
+            },
+            'expected_warnings': [
+                'URL could be a direct video link, returning it as such.'
+            ]
+        },
+        # RSS feed
+        {
+            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+            'info_dict': {
+                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+                'title': 'Zero Punctuation',
+                'description': 're:.*groundbreaking video review series.*'
+            },
+            'playlist_mincount': 11,
+        },
+        # RSS feed with enclosure
+        {
+            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+            'info_dict': {
+                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+                'ext': 'm4v',
+                'upload_date': '20150228',
+                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+            }
+        },
+        # google redirect
+        {
+            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
+            'info_dict': {
+                'id': 'cmQHVoWB5FY',
+                'ext': 'mp4',
+                'upload_date': '20130224',
+                'uploader_id': 'TheVerge',
+                'description': 're:^Chris Ziegler takes a look at the\.*',
+                'uploader': 'The Verge',
+                'title': 'First Firefox OS phones side-by-side',
+            },
+            'params': {
+                'skip_download': False,
+            }
+        },
          {
              'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
              'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@ -125,17 +220,6 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,  # m3u8 download
              },
          },
-        # Direct link to a video
-        {
-            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
-            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
-            'info_dict': {
-                'id': 'trailer',
-                'ext': 'mp4',
-                'title': 'trailer',
-                'upload_date': '20100513',
-            }
-        },
          # ooyala video
          {
              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@@ -160,22 +244,6 @@ class GenericIE(InfoExtractor):
              },
              'add_ie': ['Ooyala'],
          },
-        # google redirect
-        {
-            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
-            'info_dict': {
-                'id': 'cmQHVoWB5FY',
-                'ext': 'mp4',
-                'upload_date': '20130224',
-                'uploader_id': 'TheVerge',
-                'description': 're:^Chris Ziegler takes a look at the\.*',
-                'uploader': 'The Verge',
-                'title': 'First Firefox OS phones side-by-side',
-            },
-            'params': {
-                'skip_download': False,
-            }
-        },
          # embed.ly video
          {
              'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -225,6 +293,15 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        # TVC embed
+        {
+            'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
+            'info_dict': {
+                'id': '55304',
+                'ext': 'mp4',
+                'title': 'Дошкольное воспитание',
+            },
+        },
          # SportBox embed
          {
              'url': 'http://www.vestifinance.ru/articles/25753',
@@ -407,16 +484,6 @@ class GenericIE(InfoExtractor):
                  'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
              }
          },
-        # RSS feed
-        {
-            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
-            'info_dict': {
-                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
-                'title': 'Zero Punctuation',
-                'description': 're:.*groundbreaking video review series.*'
-            },
-            'playlist_mincount': 11,
-        },
          # Multiple brightcove videos
          # https://github.com/rg3/youtube-dl/issues/2283
          {
@@ -470,21 +537,6 @@ class GenericIE(InfoExtractor):
                  'uploader': 'thoughtworks.wistia.com',
              },
          },
-        # Direct download with broken HEAD
-        {
-            'url': 'http://ai-radio.org:8000/radio.opus',
-            'info_dict': {
-                'id': 'radio',
-                'ext': 'opus',
-                'title': 'radio',
-            },
-            'params': {
-                'skip_download': True,  # infinite live stream
-            },
-            'expected_warnings': [
-                r'501.*Not Implemented'
-            ],
-        },
          # Soundcloud embed
          {
              'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
@@ -516,21 +568,6 @@ class GenericIE(InfoExtractor):
              },
              'playlist_mincount': 2,
          },
-        # Direct link with incorrect MIME type
-        {
-            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
-            'md5': '4ccbebe5f36706d85221f204d7eb5913',
-            'info_dict': {
-                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
-                'id': '5_Lennart_Poettering_-_Systemd',
-                'ext': 'webm',
-                'title': '5_Lennart_Poettering_-_Systemd',
-                'upload_date': '20141120',
-            },
-            'expected_warnings': [
-                'URL could be a direct video link, returning it as such.'
-            ]
-        },
          # Cinchcast embed
          {
              'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
@@ -689,16 +726,6 @@ class GenericIE(InfoExtractor):
                  'age_limit': 0,
              },
          },
-        # RSS feed with enclosure
-        {
-            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
-            'info_dict': {
-                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
-                'ext': 'm4v',
-                'upload_date': '20150228',
-                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
-            }
-        },
          # Crooks and Liars embed
          {
              'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
@@ -773,6 +800,18 @@ class GenericIE(InfoExtractor):
                  # rtmpe downloads
                  'skip_download': True,
              }
+        },
+        # Brightcove URL in single quotes
+        {
+            'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+            'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+            'info_dict': {
+                'id': '4255764656001',
+                'ext': 'mp4',
+                'title': 'SN Presents: Russell Martin, World Citizen',
+                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+                'uploader': 'Rogers Sportsnet',
+            },
          }
      ]
  
@@ -894,7 +933,7 @@ class GenericIE(InfoExtractor):
              force_videoid = smuggled_data['force_videoid']
              video_id = force_videoid
          else:
-            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
+            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
  
          self.to_screen('%s: Requesting header' % video_id)
  
@@ -916,7 +955,9 @@ class GenericIE(InfoExtractor):
  
          full_response = None
          if head_response is False:
-            full_response = self._request_webpage(url, video_id)
+            request = compat_urllib_request.Request(url)
+            request.add_header('Accept-Encoding', '*')
+            full_response = self._request_webpage(request, video_id)
              head_response = full_response
  
          # Check for direct link to a video
@@ -927,7 +968,7 @@ class GenericIE(InfoExtractor):
                  head_response.headers.get('Last-Modified'))
              return {
                  'id': video_id,
-                'title': os.path.splitext(url_basename(url))[0],
+                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
                  'direct': True,
                  'formats': [{
                      'format_id': m.group('format_id'),
@@ -941,7 +982,17 @@ class GenericIE(InfoExtractor):
              self._downloader.report_warning('Falling back on generic information extractor.')
  
          if not full_response:
-            full_response = self._request_webpage(url, video_id)
+            request = compat_urllib_request.Request(url)
+            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+            # making it impossible to download only chunk of the file (yet we need only 512kB to
+            # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
+            # that will always result in downloading the whole file that is not desirable.
+            # Therefore for extraction pass we have to override Accept-Encoding to any in order
+            # to accept raw bytes and being able to download only a chunk.
+            # It may probably better to solve this by checking Content-Type for application/octet-stream
+            # after HEAD request finishes, but not sure if we can rely on this.
+            request.add_header('Accept-Encoding', '*')
+            full_response = self._request_webpage(request, video_id)
  
          # Maybe it's a direct link to a video?
          # Be careful not to download the whole thing!
@@ -953,7 +1004,7 @@ class GenericIE(InfoExtractor):
                  head_response.headers.get('Last-Modified'))
              return {
                  'id': video_id,
-                'title': os.path.splitext(url_basename(url))[0],
+                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
                  'direct': True,
                  'url': url,
                  'upload_date': upload_date,
@@ -1033,7 +1084,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded rtl.nl player
          matches = re.findall(
-            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
+            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
              webpage)
          if matches:
              return _playlist_from_matches(matches, ie='RtlNl')
@@ -1261,11 +1312,27 @@ class GenericIE(InfoExtractor):
          if rutv_url:
              return self.url_result(rutv_url, 'RUTV')
  
+        # Look for embedded TVC player
+        tvc_url = TVCIE._extract_url(webpage)
+        if tvc_url:
+            return self.url_result(tvc_url, 'TVC')
+
          # Look for embedded SportBox player
          sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
          if sportbox_urls:
              return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
  
+        # Look for embedded PornHub player
+        pornhub_url = PornHubIE._extract_url(webpage)
+        if pornhub_url:
+            return self.url_result(pornhub_url, 'PornHub')
+
+        # Look for embedded Tvigle player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Tvigle')
+
          # Look for embedded TED player
          mobj = re.search(
              r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py

index fe5d95e2c9cad488f342233e7ebfd52e42a86de3..d692ea79ab493174038c9649445e6a592a86687c 100644 (file)
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_urlparse
  from ..utils import (
      int_or_none,
      js_to_json,
@@ -12,7 +13,7 @@ from ..utils import (
  
  
  class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
  
      _TESTS = [{
          'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -34,7 +35,8 @@ class ImgurIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            compat_urlparse.urljoin(url, video_id), video_id)
  
          width = int_or_none(self._search_regex(
              r'<param name="width" value="([0-9]+)"',
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index b107557880345157fcf66cb7e6e1b4a92b25af2e..3d78f78c46d1ad004339bc33ebcb09d1286e5092 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    limit_length,
+)
  
  
  class InstagramIE(InfoExtractor):
@@ -100,11 +103,13 @@ class InstagramUserIE(InfoExtractor):
                  thumbnails_el = it.get('images', {})
                  thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
  
-                title = it.get('caption', {}).get('text', it['id'])
+                # In some cases caption is null, which corresponds to None
+                # in python. As a result, it.get('caption', {}) gives None
+                title = (it.get('caption') or {}).get('text', it['id'])
  
                  entries.append({
                      'id': it['id'],
-                    'title': title,
+                    'title': limit_length(title, 80),
                      'formats': formats,
                      'thumbnail': thumbnail,
                      'webpage_url': it.get('link'),
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py

index 8529bedfc0ab283790e74144bc9d570df19dc4b3..821c8ec109236b787b9afa2985e450ff8a647595 100644 (file)
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -11,11 +11,12 @@ from ..compat import (
  )
  from ..utils import (
      ExtractorError,
+    remove_end,
  )
  
  
  class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
  
      _TESTS = [{
          'url': 'http://play.iprima.cz/particka/particka-92',
@@ -23,7 +24,7 @@ class IPrimaIE(InfoExtractor):
              'id': '39152',
              'ext': 'flv',
              'title': 'Partička (92)',
-            'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
+            'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
              'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
          },
          'params': {
@@ -35,13 +36,14 @@ class IPrimaIE(InfoExtractor):
              'id': '9718337',
              'ext': 'flv',
              'title': 'Tchibo Partička - Jarní móda',
-            'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
              'thumbnail': 're:^http:.*\.jpg$',
          },
          'params': {
              'skip_download': True,  # requires rtmpdump
          },
-        'skip': 'Do not have permission to access this page',
+    }, {
+        'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -102,8 +104,10 @@ class IPrimaIE(InfoExtractor):
  
          return {
              'id': real_id,
-            'title': self._og_search_title(webpage),
+            'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
              'thumbnail': self._og_search_thumbnail(webpage),
              'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': self._search_regex(
+                r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
+                webpage, 'description', default=None),
          }
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py

new file mode 100644 (file)

index 0000000..9106dd0
--- /dev/null
+++ b/youtube_dl/extractor/iqiyi.py
@@ -0,0 +1,296 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import math
+import os.path
+import random
+import re
+import time
+import uuid
+import zlib
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+    ExtractorError,
+    url_basename,
+)
+
+
+class IqiyiIE(InfoExtractor):
+    IE_NAME = 'iqiyi'
+
+    _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
+
+    _TESTS = [{
+        'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
+        'md5': '2cb594dc2781e6c941a110d8f358118b',
+        'info_dict': {
+            'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
+            'title': '美国德州空中惊现奇异云团 酷似UFO',
+            'ext': 'f4v',
+        }
+    }, {
+        'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
+        'info_dict': {
+            'id': 'e3f585b550a280af23c98b6cb2be19fb',
+            'title': '名侦探柯南第752集',
+        },
+        'playlist': [{
+            'md5': '7e49376fecaffa115d951634917fe105',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': '41b75ba13bb7ac0e411131f92bc4f6ca',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': '0cee1dd0a3d46a83e71e2badeae2aab0',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': '4f8ad72373b0c491b582e7c196b0b1f9',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': 'd89ad028bcfad282918e8098e811711d',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': '9cb1e5c95da25dff0660c32ae50903b7',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': '155116e0ff1867bbc9b98df294faabc9',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }, {
+            'md5': '53f5db77622ae14fa493ed2a278a082b',
+            'info_dict': {
+                'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
+                'ext': 'f4v',
+                'title': '名侦探柯南第752集',
+            },
+        }],
+    }]
+
+    _FORMATS_MAP = [
+        ('1', 'h6'),
+        ('2', 'h5'),
+        ('3', 'h4'),
+        ('4', 'h3'),
+        ('5', 'h2'),
+        ('10', 'h1'),
+    ]
+
+    def construct_video_urls(self, data, video_id, _uuid):
+        def do_xor(x, y):
+            a = y % 3
+            if a == 1:
+                return x ^ 121
+            if a == 2:
+                return x ^ 72
+            return x ^ 103
+
+        def get_encode_code(l):
+            a = 0
+            b = l.split('-')
+            c = len(b)
+            s = ''
+            for i in range(c - 1, -1, -1):
+                a = do_xor(int(b[c - i - 1], 16), i)
+                s += chr(a)
+            return s[::-1]
+
+        def get_path_key(x, format_id, segment_index):
+            mg = ')(*&^flash@#$%a'
+            tm = self._download_json(
+                'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
+                note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
+            )['t']
+            t = str(int(math.floor(int(tm) / (600.0))))
+            return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
+
+        video_urls_dict = {}
+        for format_item in data['vp']['tkl'][0]['vs']:
+            if 0 < int(format_item['bid']) <= 10:
+                format_id = self.get_format(format_item['bid'])
+            else:
+                continue
+
+            video_urls = []
+
+            video_urls_info = format_item['fs']
+            if not format_item['fs'][0]['l'].startswith('/'):
+                t = get_encode_code(format_item['fs'][0]['l'])
+                if t.endswith('mp4'):
+                    video_urls_info = format_item['flvs']
+
+            for segment_index, segment in enumerate(video_urls_info):
+                vl = segment['l']
+                if not vl.startswith('/'):
+                    vl = get_encode_code(vl)
+                key = get_path_key(
+                    vl.split('/')[-1].split('.')[0], format_id, segment_index)
+                filesize = segment['b']
+                base_url = data['vp']['du'].split('/')
+                base_url.insert(-1, key)
+                base_url = '/'.join(base_url)
+                param = {
+                    'su': _uuid,
+                    'qyid': uuid.uuid4().hex,
+                    'client': '',
+                    'z': '',
+                    'bt': '',
+                    'ct': '',
+                    'tn': str(int(time.time()))
+                }
+                api_video_url = base_url + vl + '?' + \
+                    compat_urllib_parse.urlencode(param)
+                js = self._download_json(
+                    api_video_url, video_id,
+                    note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
+                video_url = js['l']
+                video_urls.append(
+                    (video_url, filesize))
+
+            video_urls_dict[format_id] = video_urls
+        return video_urls_dict
+
+    def get_format(self, bid):
+        matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
+        return matched_format_ids[0] if len(matched_format_ids) else None
+
+    def get_bid(self, format_id):
+        matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
+        return matched_bids[0] if len(matched_bids) else None
+
+    def get_raw_data(self, tvid, video_id, enc_key, _uuid):
+        tm = str(int(time.time()))
+        param = {
+            'key': 'fvip',
+            'src': hashlib.md5(b'youtube-dl').hexdigest(),
+            'tvId': tvid,
+            'vid': video_id,
+            'vinfo': 1,
+            'tm': tm,
+            'enc': hashlib.md5(
+                (enc_key + tm + tvid).encode('utf8')).hexdigest(),
+            'qyid': _uuid,
+            'tn': random.random(),
+            'um': 0,
+            'authkey': hashlib.md5(
+                (tm + tvid).encode('utf8')).hexdigest()
+        }
+
+        api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
+            compat_urllib_parse.urlencode(param)
+        raw_data = self._download_json(api_url, video_id)
+        return raw_data
+
+    def get_enc_key(self, swf_url, video_id):
+        filename, _ = os.path.splitext(url_basename(swf_url))
+        enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename)
+        if enc_key_json is not None:
+            return enc_key_json[0]
+
+        req = self._request_webpage(
+            swf_url, video_id, note='download swf content')
+        cn = req.read()
+        cn = zlib.decompress(cn[8:])
+        pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
+        enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
+
+        self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key])
+
+        return enc_key
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(
+            url, 'temp_id', note='download video page')
+        tvid = self._search_regex(
+            r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
+        video_id = self._search_regex(
+            r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
+        swf_url = self._search_regex(
+            r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
+        _uuid = uuid.uuid4().hex
+
+        enc_key = self.get_enc_key(swf_url, video_id)
+
+        raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
+
+        if raw_data['code'] != 'A000000':
+            raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
+
+        if not raw_data['data']['vp']['tkl']:
+            raise ExtractorError('No support iQiqy VIP video')
+
+        data = raw_data['data']
+
+        title = data['vi']['vn']
+
+        # generate video_urls_dict
+        video_urls_dict = self.construct_video_urls(
+            data, video_id, _uuid)
+
+        # construct info
+        entries = []
+        for format_id in video_urls_dict:
+            video_urls = video_urls_dict[format_id]
+            for i, video_url_info in enumerate(video_urls):
+                if len(entries) < i + 1:
+                    entries.append({'formats': []})
+                entries[i]['formats'].append(
+                    {
+                        'url': video_url_info[0],
+                        'filesize': video_url_info[-1],
+                        'format_id': format_id,
+                        'preference': int(self.get_bid(format_id))
+                    }
+                )
+
+        for i in range(len(entries)):
+            self._sort_formats(entries[i]['formats'])
+            entries[i].update(
+                {
+                    'id': '%s_part%d' % (video_id, i + 1),
+                    'title': title,
+                }
+            )
+
+        if len(entries) > 1:
+            info = {
+                '_type': 'multi_video',
+                'id': video_id,
+                'title': title,
+                'entries': entries,
+            }
+        else:
+            info = entries[0]
+            info['id'] = video_id
+            info['title'] = title
+
+        return info
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py

index 99a1361f844c15520c842cd9fffa1e5c2e9b6974..bc226fa67c064b991674a510b1eba54d40dc67e0 100644 (file)
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      determine_ext,
      float_or_none,
@@ -30,7 +31,7 @@ class IzleseneIE(InfoExtractor):
                  'description': 'md5:253753e2655dde93f59f74b572454f6d',
                  'thumbnail': 're:^http://.*\.jpg',
                  'uploader_id': 'pelikzzle',
-                'timestamp': 1404302298,
+                'timestamp': int,
                  'upload_date': '20140702',
                  'duration': 95.395,
                  'age_limit': 0,
@@ -46,7 +47,7 @@ class IzleseneIE(InfoExtractor):
                  'description': 'Tarkan Dortmund 2006 Konseri',
                  'thumbnail': 're:^http://.*\.jpg',
                  'uploader_id': 'parlayankiz',
-                'timestamp': 1163322193,
+                'timestamp': int,
                  'upload_date': '20061112',
                  'duration': 253.666,
                  'age_limit': 0,
@@ -67,9 +68,9 @@ class IzleseneIE(InfoExtractor):
  
          uploader = self._html_search_regex(
              r"adduserUsername\s*=\s*'([^']+)';",
-            webpage, 'uploader', fatal=False, default='')
+            webpage, 'uploader', fatal=False)
          timestamp = parse_iso8601(self._html_search_meta(
-            'uploadDate', webpage, 'upload date', fatal=False))
+            'uploadDate', webpage, 'upload date'))
  
          duration = float_or_none(self._html_search_regex(
              r'"videoduration"\s*:\s*"([^"]+)"',
@@ -86,8 +87,7 @@ class IzleseneIE(InfoExtractor):
  
          # Might be empty for some videos.
          streams = self._html_search_regex(
-            r'"qualitylevel"\s*:\s*"([^"]+)"',
-            webpage, 'streams', fatal=False, default='')
+            r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
  
          formats = []
          if streams:
@@ -95,15 +95,15 @@ class IzleseneIE(InfoExtractor):
                  quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
                  formats.append({
                      'format_id': '%sp' % quality if quality else 'sd',
-                    'url': url,
+                    'url': compat_urllib_parse_unquote(url),
                      'ext': ext,
                  })
          else:
              stream_url = self._search_regex(
-                r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
+                r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
              formats.append({
                  'format_id': 'sd',
-                'url': stream_url,
+                'url': compat_urllib_parse_unquote(stream_url),
                  'ext': ext,
              })
  
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py

index 7d4b57056509383fdc082a68c1650f38dc258763..1d391e69ff7e0aba1b78ae5e32792b2dca839943 100644 (file)
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@@ -28,6 +28,14 @@ class KickStarterIE(InfoExtractor):
              'uploader': 'Pebble Technology',
              'title': 'Pebble iOS Notifications',
          }
+    }, {
+        'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
+        'info_dict': {
+            'id': '1420158244',
+            'ext': 'mp4',
+            'title': 'Power Drive 2000',
+        },
+        'expected_warnings': ['OpenGraph description'],
      }]
  
      def _real_extract(self, url):
@@ -48,10 +56,15 @@ class KickStarterIE(InfoExtractor):
                  'title': title,
              }
  
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+        if thumbnail is None:
+            thumbnail = self._html_search_regex(
+                r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"',
+                webpage, 'thumbnail image', fatal=False)
          return {
              'id': video_id,
              'url': video_url,
              'title': title,
              'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': thumbnail,
          }
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py

index 42cb6e35f821256e90c8eef4f176812e3e0f42d0..f8cbca7b36afab1890b71806d6761bbe67d7d924 100644 (file)
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
  from ..utils import (
      determine_ext,
      int_or_none,
+    remove_end,
      unified_strdate,
      ExtractorError,
  )
@@ -39,7 +40,6 @@ class LifeNewsIE(InfoExtractor):
              'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
              'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
              'upload_date': '20150402',
-            'uploader': 'embed.life.ru',
          }
      }, {
          'url': 'http://lifenews.ru/news/153461',
@@ -50,7 +50,6 @@ class LifeNewsIE(InfoExtractor):
              'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
              'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
              'upload_date': '20150505',
-            'uploader': 'embed.life.ru',
          }
      }, {
          'url': 'http://lifenews.ru/video/13035',
@@ -72,20 +71,20 @@ class LifeNewsIE(InfoExtractor):
          if not videos and not iframe_link:
              raise ExtractorError('No media links available for %s' % video_id)
  
-        title = self._og_search_title(webpage)
-        TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
-        if title.endswith(TITLE_SUFFIX):
-            title = title[:-len(TITLE_SUFFIX)]
+        title = remove_end(
+            self._og_search_title(webpage),
+            ' - Первый по срочным новостям — LIFE | NEWS')
  
          description = self._og_search_description(webpage)
  
          view_count = self._html_search_regex(
              r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
          comment_count = self._html_search_regex(
-            r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
+            r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
+            webpage, 'comment count', fatal=False)
  
          upload_date = self._html_search_regex(
-            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
+            r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
          if upload_date is not None:
              upload_date = unified_strdate(upload_date)
  
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py

index 35822067f908f0567e8dcb8c9c8265df4d3421c2..857edfde263196d9bf2811568cc9f9de90eed92b 100644 (file)
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -40,6 +40,17 @@ class LiveLeakIE(InfoExtractor):
              'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
              'age_limit': 18,
          }
+    }, {
+        # Covers https://github.com/rg3/youtube-dl/pull/5983
+        'url': 'http://www.liveleak.com/view?i=801_1409392012',
+        'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
+        'info_dict': {
+            'id': '801_1409392012',
+            'ext': 'mp4',
+            'description': "Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.",
+            'uploader': 'bony333',
+            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
+        }
      }]
  
      def _real_extract(self, url):
@@ -85,7 +96,10 @@ class LiveLeakIE(InfoExtractor):
              'url': s['file'],
          } for i, s in enumerate(sources)]
          for i, s in enumerate(sources):
-            orig_url = s['file'].replace('.h264_base.mp4', '')
+            # Removing '.h264_*.mp4' gives the raw video, which is essentially
+            # the same video without the LiveLeak logo at the top (see
+            # https://github.com/rg3/youtube-dl/pull/4768)
+            orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
              if s['file'] != orig_url:
                  formats.append({
                      'format_id': 'original-%s' % i,
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py

index c10405f04d3cc1b3e89004029b7502112e9baa29..925967753bd12816005b5ed8929f0438e9ec0214 100644 (file)
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -6,6 +6,7 @@ import re
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
+    compat_urlparse,
  )
  from ..utils import (
      ExtractorError,
@@ -16,7 +17,7 @@ from ..utils import (
  class NaverIE(InfoExtractor):
      _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://tvcast.naver.com/v/81652',
          'info_dict': {
              'id': '81652',
@@ -25,7 +26,18 @@ class NaverIE(InfoExtractor):
              'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
              'upload_date': '20130903',
          },
-    }
+    }, {
+        'url': 'http://tvcast.naver.com/v/395837',
+        'md5': '638ed4c12012c458fefcddfd01f173cd',
+        'info_dict': {
+            'id': '395837',
+            'ext': 'mp4',
+            'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
+            'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
+            'upload_date': '20150519',
+        },
+        'skip': 'Georestricted',
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
@@ -35,7 +47,7 @@ class NaverIE(InfoExtractor):
                           webpage)
          if m_id is None:
              m_error = re.search(
-                r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
+                r'(?s)<div class="(?:nation_error|nation_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
                  webpage)
              if m_error:
                  raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
@@ -58,14 +70,18 @@ class NaverIE(InfoExtractor):
          formats = []
          for format_el in urls.findall('EncodingOptions/EncodingOption'):
              domain = format_el.find('Domain').text
+            uri = format_el.find('uri').text
              f = {
-                'url': domain + format_el.find('uri').text,
+                'url': compat_urlparse.urljoin(domain, uri),
                  'ext': 'mp4',
                  'width': int(format_el.find('width').text),
                  'height': int(format_el.find('height').text),
              }
              if domain.startswith('rtmp'):
+                # urlparse does not support custom schemes
+                # https://bugs.python.org/issue18828
                  f.update({
+                    'url': domain + uri,
                      'ext': 'flv',
                      'rtmp_protocol': '1',  # rtmpt
                  })
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py

index 2684dd250aa65e22903612f4a1780fc8f701296a..dc54634a58e440fc70ae9bcb3e7d5781981b2b1e 100644 (file)
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@@ -19,7 +19,7 @@ class NFLIE(InfoExtractor):
      _VALID_URL = r'''(?x)https?://
          (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
          (?:.+?/)*
-        (?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
+        (?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
      _TESTS = [
          {
              'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
@@ -58,6 +58,10 @@ class NFLIE(InfoExtractor):
                  'upload_date': '20150202',
              },
          },
+        {
+            'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
+            'only_matching': True,
+        }
      ]
  
      @staticmethod
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py

index 3cecebf95a4acd0a388da5984aebc2822e79b32c..0f8aa5adad5b2247621ce00249f3bd03a33a104a 100644 (file)
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -182,7 +182,6 @@ class NiconicoIE(InfoExtractor):
          extension = xpath_text(video_info, './/movie_type')
          if not extension:
              extension = determine_ext(video_real_url)
-        video_format = extension.upper()
  
          thumbnail = (
              xpath_text(video_info, './/thumbnail_url') or
@@ -241,7 +240,7 @@ class NiconicoIE(InfoExtractor):
              'url': video_real_url,
              'title': title,
              'ext': extension,
-            'format': video_format,
+            'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
              'thumbnail': thumbnail,
              'description': description,
              'uploader': uploader,
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py

index 664dc81d47ce7af613636022f4e540dffd67f8b6..5bbd2dcf66294f5f0e21b6aae000f9ddecd5c051 100644 (file)
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -166,6 +166,10 @@ class NocoIE(InfoExtractor):
          self._sort_formats(formats)
  
          timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
+
+        if timestamp is not None and timestamp < 0:
+            timestamp = None
+
          uploader = show.get('partner_name')
          uploader_id = show.get('partner_key')
          duration = float_or_none(show.get('duration_ms'), 1000)
diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py

new file mode 100644 (file)

index 0000000..3f9c776
--- /dev/null
+++ b/youtube_dl/extractor/nova.py
@@ -0,0 +1,179 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    unified_strdate,
+)
+
+
+class NovaIE(InfoExtractor):
+    IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
+    _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
+    _TESTS = [{
+        'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
+        'info_dict': {
+            'id': '1608920',
+            'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
+            'ext': 'flv',
+            'title': 'Duel: Michal Hrdlička a Petr Suchoň',
+            'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
+            'thumbnail': 're:^https?://.*\.(?:jpg)',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
+        'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
+        'info_dict': {
+            'id': '1757139',
+            'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
+            'ext': 'mp4',
+            'title': 'Podzemní nemocnice v pražské Krči',
+            'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
+            'thumbnail': 're:^https?://.*\.(?:jpg)',
+        }
+    }, {
+        'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
+        'info_dict': {
+            'id': '1756825',
+            'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
+            'ext': 'flv',
+            'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
+            'description': 'md5:dc24e50be5908df83348e50d1431295e',  # Make sure this description is clean of html tags
+            'thumbnail': 're:^https?://.*\.(?:jpg)',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
+        'info_dict': {
+            'id': '1756858',
+            'ext': 'flv',
+            'title': 'Televizní noviny - 30. 5. 2015',
+            'thumbnail': 're:^https?://.*\.(?:jpg)',
+            'upload_date': '20150530',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+        'info_dict': {
+            'id': '1753621',
+            'ext': 'mp4',
+            'title': 'Zaklínač 3: Divoký hon',
+            'description': 're:.*Pokud se stejně jako my nemůžete.*',
+            'thumbnail': 're:https?://.*\.jpg(\?.*)?',
+            'upload_date': '20150521',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+        site = mobj.group('site')
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            [r"(?:media|video_id)\s*:\s*'(\d+)'",
+             r'media=(\d+)',
+             r'id="article_video_(\d+)"',
+             r'id="player_(\d+)"'],
+            webpage, 'video id')
+
+        config_url = self._search_regex(
+            r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
+            webpage, 'config url', default=None)
+
+        if not config_url:
+            DEFAULT_SITE_ID = '23000'
+            SITES = {
+                'tvnoviny': DEFAULT_SITE_ID,
+                'novaplus': DEFAULT_SITE_ID,
+                'vymena': DEFAULT_SITE_ID,
+                'krasna': DEFAULT_SITE_ID,
+                'fanda': '30',
+                'tn': '30',
+                'doma': '30',
+            }
+
+            site_id = self._search_regex(
+                r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
+
+            config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
+                          % (site_id, video_id))
+
+        config = self._download_json(
+            config_url, display_id,
+            'Downloading config JSON',
+            transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+        mediafile = config['mediafile']
+        video_url = mediafile['src']
+
+        m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
+        if m:
+            formats = [{
+                'url': m.group('url'),
+                'app': m.group('app'),
+                'play_path': m.group('playpath'),
+                'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
+                'ext': 'flv',
+            }]
+        else:
+            formats = [{
+                'url': video_url,
+            }]
+        self._sort_formats(formats)
+
+        title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
+        description = clean_html(self._og_search_description(webpage, default=None))
+        thumbnail = config.get('poster')
+
+        if site == 'novaplus':
+            upload_date = unified_strdate(self._search_regex(
+                r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
+        elif site == 'fanda':
+            upload_date = unified_strdate(self._search_regex(
+                r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
+        else:
+            upload_date = None
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py

new file mode 100644 (file)

index 0000000..173e46c
--- /dev/null
+++ b/youtube_dl/extractor/nowtv.py
@@ -0,0 +1,192 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_iso8601,
+    parse_duration,
+    remove_start,
+)
+
+
+class NowTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player'
+
+    _TESTS = [{
+        # rtl
+        'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player',
+        'info_dict': {
+            'id': '203519',
+            'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
+            'ext': 'mp4',
+            'title': 'Die neuen Bauern und eine Hochzeit',
+            'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1432580700,
+            'upload_date': '20150525',
+            'duration': 2786,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # rtl2
+        'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player',
+        'info_dict': {
+            'id': '203481',
+            'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934',
+            'ext': 'mp4',
+            'title': 'Berlin - Tag & Nacht (Folge 934)',
+            'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1432666800,
+            'upload_date': '20150526',
+            'duration': 2641,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # rtlnitro
+        'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player',
+        'info_dict': {
+            'id': '165780',
+            'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00',
+            'ext': 'mp4',
+            'title': 'Hals- und Beinbruch',
+            'description': 'md5:b50d248efffe244e6f56737f0911ca57',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1432415400,
+            'upload_date': '20150523',
+            'duration': 2742,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # superrtl
+        'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player',
+        'info_dict': {
+            'id': '99205',
+            'display_id': 'medicopter-117/angst',
+            'ext': 'mp4',
+            'title': 'Angst!',
+            'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1222632900,
+            'upload_date': '20080928',
+            'duration': 3025,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # ntv
+        'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player',
+        'info_dict': {
+            'id': '203521',
+            'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch',
+            'ext': 'mp4',
+            'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
+            'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1432751700,
+            'upload_date': '20150527',
+            'duration': 1083,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # vox
+        'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player',
+        'info_dict': {
+            'id': '128953',
+            'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel',
+            'ext': 'mp4',
+            'title': "Büro-Fall / Chihuahua 'Joel'",
+            'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1432408200,
+            'upload_date': '20150523',
+            'duration': 3092,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+        station = mobj.group('station')
+
+        info = self._download_json(
+            'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id,
+            display_id)
+
+        video_id = compat_str(info['id'])
+
+        files = info['files']
+        if not files:
+            if info.get('geoblocked', False):
+                raise ExtractorError(
+                    'Video %s is not available from your location due to geo restriction' % video_id,
+                    expected=True)
+            if not info.get('free', True):
+                raise ExtractorError(
+                    'Video %s is not available for free' % video_id, expected=True)
+
+        f = info.get('format', {})
+        station = f.get('station') or station
+
+        STATIONS = {
+            'rtl': 'rtlnow',
+            'rtl2': 'rtl2now',
+            'vox': 'voxnow',
+            'nitro': 'rtlnitronow',
+            'ntv': 'n-tvnow',
+            'superrtl': 'superrtlnow'
+        }
+
+        formats = []
+        for item in files['items']:
+            item_path = remove_start(item['path'], '/')
+            tbr = int_or_none(item['bitrate'])
+            m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path)
+            m3u8_url = m3u8_url.replace('now/', 'now/videos/')
+            formats.append({
+                'url': m3u8_url,
+                'format_id': '%s-%sk' % (item['id'], tbr),
+                'ext': 'mp4',
+                'tbr': tbr,
+            })
+        self._sort_formats(formats)
+
+        title = info['title']
+        description = info.get('articleLong') or info.get('articleShort')
+        timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+        duration = parse_duration(info.get('duration'))
+        thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py

index fbc521d1aae02077ae62c5cd0a6c5f9cdcff014a..6c7149fe3859732978d34356f8c18aca14e74f63 100644 (file)
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -2,6 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
+from ..compat import compat_urllib_parse
  from ..utils import (
      unified_strdate,
      int_or_none,
@@ -11,8 +12,9 @@ from ..utils import (
  
  
  class OdnoklassnikiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
      _TESTS = [{
+        # metadata in JSON
          'url': 'http://ok.ru/video/20079905452',
          'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
          'info_dict': {
@@ -20,11 +22,22 @@ class OdnoklassnikiIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'Культура меняет нас (прекрасный ролик!))',
              'duration': 100,
-            'upload_date': '20141207',
              'uploader_id': '330537914540',
              'uploader': 'Виталий Добровольский',
              'like_count': int,
-            'age_limit': 0,
+        },
+    }, {
+        # metadataUrl
+        'url': 'http://ok.ru/video/63567059965189-0',
+        'md5': '9676cf86eff5391d35dea675d224e131',
+        'info_dict': {
+            'id': '63567059965189-0',
+            'ext': 'mp4',
+            'title': 'Девушка без комплексов ...',
+            'duration': 191,
+            'uploader_id': '534380003155',
+            'uploader': 'Андрей Мещанинов',
+            'like_count': int,
          },
      }, {
          'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
@@ -34,14 +47,23 @@ class OdnoklassnikiIE(InfoExtractor):
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            'http://ok.ru/video/%s' % video_id, video_id)
  
          player = self._parse_json(
              unescapeHTML(self._search_regex(
                  r'data-attributes="([^"]+)"', webpage, 'player')),
              video_id)
  
-        metadata = self._parse_json(player['flashvars']['metadata'], video_id)
+        flashvars = player['flashvars']
+
+        metadata = flashvars.get('metadata')
+        if metadata:
+            metadata = self._parse_json(metadata, video_id)
+        else:
+            metadata = self._download_json(
+                compat_urllib_parse.unquote(flashvars['metadataUrl']),
+                video_id, 'Downloading metadata JSON')
  
          movie = metadata['movie']
          title = movie['title']
@@ -53,11 +75,11 @@ class OdnoklassnikiIE(InfoExtractor):
          uploader = author.get('name')
  
          upload_date = unified_strdate(self._html_search_meta(
-            'ya:ovs:upload_date', webpage, 'upload date'))
+            'ya:ovs:upload_date', webpage, 'upload date', default=None))
  
          age_limit = None
          adult = self._html_search_meta(
-            'ya:ovs:adult', webpage, 'age limit')
+            'ya:ovs:adult', webpage, 'age limit', default=None)
          if adult:
              age_limit = 18 if adult == 'true' else 0
  
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py

index f179ea2008636f061c6a4cdad6fc69841a291076..6cdc2638b4930dc92835d71f673b560dea99022d 100644 (file)
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -87,7 +87,7 @@ class PatreonIE(InfoExtractor):
              r'<div class="attach"><a target="_blank" href="([^"]+)">',
              webpage, 'attachment URL', default=None)
          embed = self._html_search_regex(
-            r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
+            r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
              webpage, 'embedded URL', default=None)
  
          if attach_fn is not None:
diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py

new file mode 100644 (file)

index 0000000..72d1b27
--- /dev/null
+++ b/youtube_dl/extractor/porn91.py
@@ -0,0 +1,71 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from ..compat import compat_urllib_parse
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+    ExtractorError,
+)
+
+
+class Porn91IE(InfoExtractor):
+    IE_NAME = '91porn'
+    _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
+
+    _TEST = {
+        'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
+        'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
+        'info_dict': {
+            'id': '7e42283b4f5ab36da134',
+            'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
+            'ext': 'mp4',
+            'duration': 431,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
+        self._set_cookie('91porn.com', 'language', 'cn_CN')
+        webpage = self._download_webpage(url, video_id, 'get HTML content')
+
+        if '作为游客，你每天只可观看10个视频' in webpage:
+            raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
+
+        title = self._search_regex(
+            r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
+        title = title.replace('\n', '')
+
+        # get real url
+        file_id = self._search_regex(
+            r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id')
+        sec_code = self._search_regex(
+            r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
+        max_vid = self._search_regex(
+            r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
+        url_params = compat_urllib_parse.urlencode({
+            'VID': file_id,
+            'mp4': '1',
+            'seccode': sec_code,
+            'max_vid': max_vid,
+        })
+        info_cn = self._download_webpage(
+            'http://91porn.com/getfile.php?' + url_params, video_id,
+            'get real video url')
+        video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
+
+        duration = parse_duration(self._search_regex(
+            r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
+
+        comment_count = int_or_none(self._search_regex(
+            r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'duration': duration,
+            'comment_count': comment_count,
+        }
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 0c8b731cf47267568e43ccd09ff21f1683b4d992..8172bc9976755f7cc4361e1f6dba8d9d7b53d5fd 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -19,8 +19,8 @@ from ..aes import (
  
  
  class PornHubIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
+    _TESTS = [{
          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
          'md5': '882f488fa1f0026f023f33576004a2ed',
          'info_dict': {
@@ -30,7 +30,17 @@ class PornHubIE(InfoExtractor):
              "title": "Seductive Indian beauty strips down and fingers her pink pussy",
              "age_limit": 18
          }
-    }
+    }, {
+        'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
+        if mobj:
+            return mobj.group('url')
  
      def _extract_count(self, pattern, webpage, name):
          return str_to_int(self._search_regex(
@@ -39,7 +49,8 @@ class PornHubIE(InfoExtractor):
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        req = compat_urllib_request.Request(url)
+        req = compat_urllib_request.Request(
+            'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
          req.add_header('Cookie', 'age_verified=1')
          webpage = self._download_webpage(req, video_id)
  
@@ -71,7 +82,8 @@ class PornHubIE(InfoExtractor):
  
          video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
          if webpage.find('"encrypted":true') != -1:
-            password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
+            password = compat_urllib_parse.unquote_plus(
+                self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
              video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
  
          formats = []
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py

index 9688ed94898de231e6c7f1c9dc28d3779da10311..eba4dfbb39576bff355b722c997dd31e07ce370f 100644 (file)
--- a/youtube_dl/extractor/pornovoisines.py
+++ b/youtube_dl/extractor/pornovoisines.py
@@ -34,7 +34,7 @@ class PornoVoisinesIE(InfoExtractor):
              'duration': 120,
              'view_count': int,
              'average_rating': float,
-            'categories': ['Débutante', 'Scénario', 'Sodomie'],
+            'categories': ['Débutantes', 'Scénario', 'Sodomie'],
              'age_limit': 18,
          }
      }
@@ -71,7 +71,7 @@ class PornoVoisinesIE(InfoExtractor):
          view_count = int_or_none(self._search_regex(
              r'(\d+) vues', webpage, 'view count', fatal=False))
          average_rating = self._search_regex(
-            r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+            r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False)
          if average_rating:
              average_rating = float_or_none(average_rating.replace(',', '.'))
  
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index 255d4abc131519ec470ccdc2b1a64b7d38d9f44b..536a42dc88a4e17bbd039289508521d1ea13e282 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -177,6 +177,7 @@ class ProSiebenSat1IE(InfoExtractor):
          r'<header class="clearfix">\s*<h3>(.+?)</h3>',
          r'<!-- start video -->\s*<h1>(.+?)</h1>',
          r'<h1 class="att-name">\s*(.+?)</h1>',
+        r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
      ]
      _DESCRIPTION_REGEXES = [
          r'<p itemprop="description">\s*(.+?)</p>',
@@ -206,8 +207,8 @@ class ProSiebenSat1IE(InfoExtractor):
      def _extract_clip(self, url, webpage):
          clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
  
-        access_token = 'testclient'
-        client_name = 'kolibri-1.2.5'
+        access_token = 'prosieben'
+        client_name = 'kolibri-1.12.6'
          client_location = url
  
          videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
@@ -275,13 +276,17 @@ class ProSiebenSat1IE(InfoExtractor):
          for source in urls_sources:
              protocol = source['protocol']
              if protocol == 'rtmp' or protocol == 'rtmpe':
-                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url'])
+                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url'])
                  if not mobj:
                      continue
+                path = mobj.group('path')
+                mp4colon_index = path.rfind('mp4:')
+                app = path[:mp4colon_index]
+                play_path = path[mp4colon_index:]
                  formats.append({
-                    'url': mobj.group('url'),
-                    'app': mobj.group('app'),
-                    'play_path': mobj.group('playpath'),
+                    'url': '%s/%s' % (mobj.group('url'), app),
+                    'app': app,
+                    'play_path': play_path,
                      'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
                      'page_url': 'http://www.prosieben.de',
                      'vbr': fix_bitrate(source['bitrate']),
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py

index b540033e25a8c8e033f9d7f7b49d6b6ae46f755b..bafa81c21c12f15ac8162f8303f56615124b260b 100644 (file)
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -9,7 +9,6 @@ from .common import InfoExtractor
  from ..utils import (
      strip_jsonp,
      unescapeHTML,
-    js_to_json,
  )
  from ..compat import compat_urllib_request
  
@@ -19,10 +18,10 @@ class QQMusicIE(InfoExtractor):
      _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
      _TESTS = [{
          'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
-        'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
+        'md5': '9ce1c1c8445f561506d2e3cfb0255705',
          'info_dict': {
              'id': '004295Et37taLD',
-            'ext': 'm4a',
+            'ext': 'mp3',
              'title': '可惜没如果',
              'upload_date': '20141227',
              'creator': '林俊杰',
@@ -30,6 +29,12 @@ class QQMusicIE(InfoExtractor):
          }
      }]
  
+    _FORMATS = {
+        'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
+        'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
+        'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}
+    }
+
      # Reference: m_r_GetRUin() in top_player.js
      # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
      @staticmethod
@@ -69,11 +74,22 @@ class QQMusicIE(InfoExtractor):
              'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
              mid, note='Retrieve vkey', errnote='Unable to get vkey',
              transform_source=strip_jsonp)['key']
-        song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
+
+        formats = []
+        for format_id, details in self._FORMATS.items():
+            formats.append({
+                'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0'
+                       % (details['prefix'], mid, details['ext'], vkey, guid),
+                'format': format_id,
+                'format_id': format_id,
+                'preference': details['preference'],
+                'abr': details.get('abr'),
+            })
+        self._sort_formats(formats)
  
          return {
              'id': mid,
-            'url': song_url,
+            'formats': formats,
              'title': song_name,
              'upload_date': publish_time,
              'creator': singer,
@@ -181,60 +197,49 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
      _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
  
      _TESTS = [{
-        'url': 'http://y.qq.com/#type=toplist&p=global_12',
+        'url': 'http://y.qq.com/#type=toplist&p=global_123',
          'info_dict': {
-            'id': 'global_12',
-            'title': 'itunes榜',
+            'id': 'global_123',
+            'title': '美国iTunes榜',
          },
          'playlist_count': 10,
      }, {
-        'url': 'http://y.qq.com/#type=toplist&p=top_6',
+        'url': 'http://y.qq.com/#type=toplist&p=top_3',
          'info_dict': {
-            'id': 'top_6',
+            'id': 'top_3',
              'title': 'QQ音乐巅峰榜·欧美',
+            'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成，集结当下最流行的欧美新歌！:更新时间：每周四22点|统'
+                           '计周期：一周（上周四至本周三）|统计对象：三个月内发行的欧美歌曲|统计数量：100首|统计算法：根据'
+                           '歌曲在一周内的有效播放次数，由高到低取前100名（同一歌手最多允许5首歌曲同时上榜）|有效播放次数：'
+                           '登录用户完整播放一首歌曲，记为一次有效播放；同一用户收听同一首歌曲，每天记录为1次有效播放'
          },
          'playlist_count': 100,
      }, {
-        'url': 'http://y.qq.com/#type=toplist&p=global_5',
+        'url': 'http://y.qq.com/#type=toplist&p=global_106',
          'info_dict': {
-            'id': 'global_5',
-            'title': '韩国mnet排行榜',
+            'id': 'global_106',
+            'title': '韩国Mnet榜',
          },
          'playlist_count': 50,
      }]
  
-    @staticmethod
-    def strip_qq_jsonp(code):
-        return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
-
      def _real_extract(self, url):
          list_id = self._match_id(url)
  
          list_type, num_id = list_id.split("_")
  
-        list_page = self._download_webpage(
-            "http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
-            list_id, 'Download toplist page')
-
-        entries = []
-        if list_type == 'top':
-            jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
-        else:
-            jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
-
          toplist_json = self._download_json(
-            jsonp_url, list_id, note='Retrieve toplist json',
-            errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
-
-        for song in toplist_json['l']:
-            s = song['s']
-            song_mid = s.split("|")[20]
-            entries.append(self.url_result(
-                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
-                song_mid))
+            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json'
+            % (list_type, num_id),
+            list_id, 'Download toplist page')
  
-        list_name = self._html_search_regex(
-            r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
-            default=None)
+        entries = [
+            self.url_result(
+                'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid']
+            ) for song in toplist_json['songlist']
+        ]
  
-        return self.playlist_result(entries, list_id, list_name)
+        topinfo = toplist_json.get('topinfo', {})
+        list_name = topinfo.get('ListName')
+        list_description = topinfo.get('info')
+        return self.playlist_result(entries, list_id, list_name, list_description)
diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py

index 5a381d9ced41516db44d7e17120b29948a1957cb..e4215d546219bb95fe79abfb184da149148962db 100644 (file)
--- a/youtube_dl/extractor/rtbf.py
+++ b/youtube_dl/extractor/rtbf.py
@@ -21,6 +21,13 @@ class RTBFIE(InfoExtractor):
          }
      }
  
+    _QUALITIES = [
+        ('mobile', 'mobile'),
+        ('web', 'SD'),
+        ('url', 'MD'),
+        ('high', 'HD'),
+    ]
+
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
@@ -32,14 +39,21 @@ class RTBFIE(InfoExtractor):
                  r'data-video="([^"]+)"', webpage, 'data video')),
              video_id)
  
-        video_url = data.get('downloadUrl') or data.get('url')
-
          if data.get('provider').lower() == 'youtube':
+            video_url = data.get('downloadUrl') or data.get('url')
              return self.url_result(video_url, 'Youtube')
+        formats = []
+        for key, format_id in self._QUALITIES:
+            format_url = data['sources'].get(key)
+            if format_url:
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                })
  
          return {
              'id': video_id,
-            'url': video_url,
+            'formats': formats,
              'title': data['title'],
              'description': data.get('description') or data.get('subtitle'),
              'thumbnail': data.get('thumbnail'),
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py

index cfce4550ada568cfe13fae859a2bb745671074b5..41d202c289839d00de98eb0a3b1e5770eb0990f5 100644 (file)
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
      IE_NAME = 'rtl.nl'
      IE_DESC = 'rtl.nl and rtlxl.nl'
      _VALID_URL = r'''(?x)
-        https?://(www\.)?
+        https?://(?:www\.)?
          (?:
              rtlxl\.nl/\#!/[^/]+/|
-            rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
+            rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
          )
          (?P<id>[0-9a-f-]+)'''
  
@@ -43,6 +43,9 @@ class RtlNlIE(InfoExtractor):
              'upload_date': '20150215',
              'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
          }
+    }, {
+        'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py

deleted file mode 100644 (file)

index 785a804..0000000
--- a/youtube_dl/extractor/rtlnow.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    clean_html,
-    unified_strdate,
-    int_or_none,
-)
-
-
-class RTLnowIE(InfoExtractor):
-    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
-    _VALID_URL = r'''(?x)
-                        (?:https?://)?
-                        (?P<url>
-                            (?P<domain>
-                                rtl-now\.rtl\.de|
-                                rtl2now\.rtl2\.de|
-                                (?:www\.)?voxnow\.de|
-                                (?:www\.)?rtlnitronow\.de|
-                                (?:www\.)?superrtlnow\.de|
-                                (?:www\.)?n-tvnow\.de)
-                            /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
-                            (?:container_id|film_id)=(?P<video_id>[0-9]+)&
-                            player=1(?:&season=[0-9]+)?(?:&.*)?
-                        )'''
-
-    _TESTS = [
-        {
-            'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
-            'info_dict': {
-                'id': '90419',
-                'ext': 'flv',
-                'title': 'Ahornallee - Folge 1 - Der Einzug',
-                'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
-                'upload_date': '20070416',
-                'duration': 1685,
-            },
-            'params': {
-                'skip_download': True,
-            },
-            'skip': 'Only works from Germany',
-        },
-        {
-            'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
-            'info_dict': {
-                'id': '69756',
-                'ext': 'flv',
-                'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
-                'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
-                'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
-                'upload_date': '20120519',
-                'duration': 1245,
-            },
-            'params': {
-                'skip_download': True,
-            },
-            'skip': 'Only works from Germany',
-        },
-        {
-            'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
-            'info_dict': {
-                'id': '13883',
-                'ext': 'flv',
-                'title': 'Voxtours - Südafrika-Reporter II',
-                'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
-                'upload_date': '20090627',
-                'duration': 1800,
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
-            'info_dict': {
-                'id': '99205',
-                'ext': 'flv',
-                'title': 'Medicopter 117 - Angst!',
-                'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
-                'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
-                'upload_date': '20080928',
-                'duration': 2691,
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
-            'info_dict': {
-                'id': '188729',
-                'ext': 'flv',
-                'upload_date': '20150204',
-                'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
-                'title': 'Der Bachelor - Folge 4',
-            }
-        }, {
-            'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
-            'only_matching': True,
-        },
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_page_url = 'http://%s/' % mobj.group('domain')
-        video_id = mobj.group('video_id')
-
-        webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
-
-        mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
-        if mobj:
-            raise ExtractorError(clean_html(mobj.group(1)), expected=True)
-
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage, default=None)
-
-        upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
-
-        mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
-        duration = int(mobj.group('seconds')) if mobj else None
-
-        playerdata_url = self._html_search_regex(
-            r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
-
-        playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
-
-        videoinfo = playerdata.find('./playlist/videoinfo')
-
-        formats = []
-        for filename in videoinfo.findall('filename'):
-            mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
-            if mobj:
-                fmt = {
-                    'url': mobj.group('url'),
-                    'play_path': 'mp4:' + mobj.group('play_path'),
-                    'page_url': video_page_url,
-                    'player_url': video_page_url + 'includes/vodplayer.swf',
-                }
-            else:
-                mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
-                if mobj:
-                    fmt = {
-                        'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
-                        'play_path': 'mp4:' + mobj.group('play_path'),
-                        'page_url': url,
-                        'player_url': video_page_url + 'includes/vodplayer.swf',
-                    }
-                else:
-                    fmt = {
-                        'url': filename.text,
-                    }
-            fmt.update({
-                'width': int_or_none(filename.get('width')),
-                'height': int_or_none(filename.get('height')),
-                'vbr': int_or_none(filename.get('bitrate')),
-                'ext': 'flv',
-            })
-            formats.append(fmt)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'duration': duration,
-            'formats': formats,
-        }
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py

new file mode 100644 (file)

index 0000000..4e22628
--- /dev/null
+++ b/youtube_dl/extractor/ruutu.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    xpath_text,
+)
+
+
+class RuutuIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
+    _TESTS = [
+        {
+            'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
+            'md5': 'ab2093f39be1ca8581963451b3c0234f',
+            'info_dict': {
+                'id': '2058907',
+                'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
+                'ext': 'mp4',
+                'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
+                'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 114,
+                'age_limit': 0,
+            },
+        },
+        {
+            'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
+            'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
+            'info_dict': {
+                'id': '2057306',
+                'display_id': 'superpesis-katso-koko-kausi-ruudussa',
+                'ext': 'mp4',
+                'title': 'Superpesis: katso koko kausi Ruudussa',
+                'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 40,
+                'age_limit': 0,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'data-media-id="(\d+)"', webpage, 'media id')
+
+        video_xml_url = None
+
+        media_data = self._search_regex(
+            r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
+            'media data', default=None)
+        if media_data:
+            media_json = self._parse_json(media_data, display_id, fatal=False)
+            if media_json:
+                xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
+                if xml_url:
+                    video_xml_url = xml_url.replace('{ID}', video_id)
+
+        if not video_xml_url:
+            video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
+
+        video_xml = self._download_xml(video_xml_url, video_id)
+
+        formats = []
+        processed_urls = []
+
+        def extract_formats(node):
+            for child in node:
+                if child.tag.endswith('Files'):
+                    extract_formats(child)
+                elif child.tag.endswith('File'):
+                    video_url = child.text
+                    if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
+                        return
+                    processed_urls.append(video_url)
+                    ext = determine_ext(video_url)
+                    if ext == 'm3u8':
+                        formats.extend(self._extract_m3u8_formats(
+                            video_url, video_id, 'mp4', m3u8_id='hls'))
+                    elif ext == 'f4m':
+                        formats.extend(self._extract_f4m_formats(
+                            video_url, video_id, f4m_id='hds'))
+                    else:
+                        proto = compat_urllib_parse_urlparse(video_url).scheme
+                        if not child.tag.startswith('HTTP') and proto != 'rtmp':
+                            continue
+                        preference = -1 if proto == 'rtmp' else 1
+                        label = child.get('label')
+                        tbr = int_or_none(child.get('bitrate'))
+                        width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')]
+                        formats.append({
+                            'format_id': '%s-%s' % (proto, label if label else tbr),
+                            'url': video_url,
+                            'width': width,
+                            'height': height,
+                            'tbr': tbr,
+                            'preference': preference,
+                        })
+
+        extract_formats(video_xml.find('./Clip'))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
+            'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py

index 10251f29e033ef241618ed7985e214dc0e76cd51..f3c80708c86ab2fc29fbd029b245bbe894af2dfb 100644 (file)
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -83,7 +83,7 @@ class SafariIE(SafariBaseIE):
                                      library/view/[^/]+|
                                      api/v1/book
                                  )/
-                                (?P<course_id>\d+)/
+                                (?P<course_id>[^/]+)/
                                      (?:chapter(?:-content)?/)?
                                  (?P<part>part\d+)\.html
      '''
@@ -100,6 +100,10 @@ class SafariIE(SafariBaseIE):
      }, {
          'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
          'only_matching': True,
+    }, {
+        # non-digits in course id
+        'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -122,7 +126,7 @@ class SafariCourseIE(SafariBaseIE):
      IE_NAME = 'safari:course'
      IE_DESC = 'safaribooksonline.com online courses'
  
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
  
      _TESTS = [{
          'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py

index d3b8a1be49702f71a1a8c4eb7bd01d17cf103071..9c53704ea383b1af34e8f8157e327b71c2c3865a 100644 (file)
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dl/extractor/senateisvp.py
@@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor):
          ["arch", "", "http://ussenate-f.akamaihd.net/"]
      ]
      _IE_NAME = 'senate.gov'
-    _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)'
+    _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
      _TESTS = [{
          'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
          'info_dict': {
@@ -72,12 +72,16 @@ class SenateISVPIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'Integrated Senate Video Player'
          }
+    }, {
+        # From http://www.c-span.org/video/?96791-1
+        'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715',
+        'only_matching': True,
      }]
  
      @staticmethod
      def _search_iframe_url(webpage):
          mobj = re.search(
-            r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]",
+            r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
              webpage)
          if mobj:
              return mobj.group('url')
diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py

deleted file mode 100644 (file)

index b5fa6f1..0000000
--- a/youtube_dl/extractor/sockshare.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
-from ..utils import (
-    determine_ext,
-    ExtractorError,
-)
-
-from .common import InfoExtractor
-
-
-class SockshareIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)'
-    _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>'
-    _TEST = {
-        'url': 'http://www.sockshare.com/file/437BE28B89D799D7',
-        'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd',
-        'info_dict': {
-            'id': '437BE28B89D799D7',
-            'title': 'big_buck_bunny_720p_surround.avi',
-            'ext': 'avi',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        url = 'http://sockshare.com/file/%s' % video_id
-        webpage = self._download_webpage(url, video_id)
-
-        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id,
-                                 expected=True)
-
-        confirm_hash = self._html_search_regex(r'''(?x)<input\s+
-            type="hidden"\s+
-            value="([^"]*)"\s+
-            name="hash"
-            ''', webpage, 'hash')
-
-        fields = {
-            "hash": confirm_hash.encode('utf-8'),
-            "confirm": "Continue as Free User"
-        }
-
-        post = compat_urllib_parse.urlencode(fields)
-        req = compat_urllib_request.Request(url, post)
-        # Apparently, this header is required for confirmation to work.
-        req.add_header('Host', 'www.sockshare.com')
-        req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
-        webpage = self._download_webpage(
-            req, video_id, 'Downloading video page')
-
-        video_url = self._html_search_regex(
-            r'<a href="([^"]*)".+class="download_file_link"',
-            webpage, 'file url')
-        video_url = "http://www.sockshare.com" + video_url
-        title = self._html_search_regex((
-            r'<h1>(.+)<strong>',
-            r'var name = "([^"]+)";'),
-            webpage, 'title', default=None)
-        thumbnail = self._html_search_regex(
-            r'<img\s+src="([^"]*)".+?name="bg"',
-            webpage, 'thumbnail', default=None)
-
-        formats = [{
-            'format_id': 'sd',
-            'url': video_url,
-            'ext': determine_ext(title),
-        }]
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py

new file mode 100644 (file)

index 0000000..5da66ca
--- /dev/null
+++ b/youtube_dl/extractor/soompi.py
@@ -0,0 +1,146 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .crunchyroll import CrunchyrollIE
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    remove_start,
+    xpath_text,
+)
+
+
+class SoompiBaseIE(InfoExtractor):
+    def _get_episodes(self, webpage, episode_filter=None):
+        episodes = self._parse_json(
+            self._search_regex(
+                r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
+            None)
+        return list(filter(episode_filter, episodes))
+
+
+class SoompiIE(SoompiBaseIE, CrunchyrollIE):
+    IE_NAME = 'soompi'
+    _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://tv.soompi.com/en/watch/29235',
+        'info_dict': {
+            'id': '29235',
+            'ext': 'mp4',
+            'title': 'Episode 1096',
+            'description': '2015-05-20'
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _get_episode(self, webpage, video_id):
+        return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
+
+    def _get_subtitles(self, config, video_id):
+        sub_langs = {}
+        for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
+            sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
+
+        subtitles = {}
+        for s in config.findall('./{default}preload/subtitle'):
+            lang_code = sub_langs.get(s.attrib['id'])
+            if not lang_code:
+                continue
+            sub_id = s.get('id')
+            data = xpath_text(s, './data', 'data')
+            iv = xpath_text(s, './iv', 'iv')
+            if not id or not iv or not data:
+                continue
+            subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
+            subtitles[lang_code] = self._extract_subtitles(subtitle)
+        return subtitles
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        try:
+            webpage = self._download_webpage(
+                url, video_id, 'Downloading episode page')
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+                webpage = ee.cause.read()
+                block_message = self._html_search_regex(
+                    r'(?s)<div class="block-message">(.+?)</div>', webpage,
+                    'block message', default=None)
+                if block_message:
+                    raise ExtractorError(block_message, expected=True)
+            raise
+
+        formats = []
+        config = None
+        for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
+            config = self._download_xml(
+                'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
+                video_id, 'Downloading %s XML' % format_id)
+            m3u8_url = xpath_text(
+                config, './{default}preload/stream_info/file',
+                '%s m3u8 URL' % format_id)
+            if not m3u8_url:
+                continue
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', m3u8_id=format_id))
+        self._sort_formats(formats)
+
+        episode = self._get_episode(webpage, video_id)
+
+        title = episode['name']
+        description = episode.get('description')
+        duration = int_or_none(episode.get('duration'))
+
+        thumbnails = [{
+            'id': thumbnail_id,
+            'url': thumbnail_url,
+        } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
+
+        subtitles = self.extract_subtitles(config, video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnails': thumbnails,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': subtitles
+        }
+
+
+class SoompiShowIE(SoompiBaseIE):
+    IE_NAME = 'soompi:show'
+    _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
+    _TESTS = [{
+        'url': 'http://tv.soompi.com/en/shows/liar-game',
+        'info_dict': {
+            'id': 'liar-game',
+            'title': 'Liar Game',
+            'description': 'md5:52c02bce0c1a622a95823591d0589b66',
+        },
+        'playlist_count': 14,
+    }]
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            url, show_id, 'Downloading show page')
+
+        title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
+        description = self._og_search_description(webpage)
+
+        entries = [
+            self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
+            for episode in self._get_episodes(webpage)]
+
+        return self.playlist_result(entries, show_id, title, description)
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py

index b936202f6f3005fe9ae085724566d709c6a484cc..bff75d6b2945584e0193b50ff8915b91fec26f1f 100644 (file)
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -27,7 +27,7 @@ class SpankwireIE(InfoExtractor):
              'description': 'Crazy Bitch X rated music video.',
              'uploader': 'oreusz',
              'uploader_id': '124697',
-            'upload_date': '20070508',
+            'upload_date': '20070507',
              'age_limit': 18,
          }
      }
@@ -44,7 +44,7 @@ class SpankwireIE(InfoExtractor):
          title = self._html_search_regex(
              r'<h1>([^<]+)', webpage, 'title')
          description = self._html_search_regex(
-            r'<div\s+id="descriptionContent">([^<]+)<',
+            r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
              webpage, 'description', fatal=False)
          thumbnail = self._html_search_regex(
              r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
@@ -64,14 +64,14 @@ class SpankwireIE(InfoExtractor):
              r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
              webpage, 'view count', fatal=False))
          comment_count = str_to_int(self._html_search_regex(
-            r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>',
+            r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
              webpage, 'comment count', fatal=False))
  
          video_urls = list(map(
              compat_urllib_parse.unquote,
-            re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
+            re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
          if webpage.find('flashvars\.encrypted = "true"') != -1:
-            password = self._html_search_regex(
+            password = self._search_regex(
                  r'flashvars\.video_title = "([^"]+)',
                  webpage, 'password').replace('+', ' ')
              video_urls = list(map(
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py

index 98cf92d89a1151edfd11b8f15a86eeaa6a83178d..08a5c4314959409cd93d71fcd8f8160ee541b7c4 100644 (file)
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@@ -2,7 +2,11 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..utils import float_or_none
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+    determine_ext,
+    float_or_none,
+)
  
  
  class SpiegeltvIE(InfoExtractor):
@@ -17,7 +21,7 @@ class SpiegeltvIE(InfoExtractor):
              'thumbnail': 're:http://.*\.jpg$',
          },
          'params': {
-            # rtmp download
+            # m3u8 download
              'skip_download': True,
          }
      }, {
@@ -51,9 +55,37 @@ class SpiegeltvIE(InfoExtractor):
          is_wide = media_json['is_wide']
  
          server_json = self._download_json(
-            'http://www.spiegel.tv/streaming_servers/', video_id,
-            note='Downloading server information')
-        server = server_json[0]['endpoint']
+            'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
+            video_id, note='Downloading server information')
+
+        format = '16x9' if is_wide else '4x3'
+
+        formats = []
+        for streamingserver in server_json['streamingserver']:
+            endpoint = streamingserver.get('endpoint')
+            if not endpoint:
+                continue
+            play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
+            if endpoint.startswith('rtmp'):
+                formats.append({
+                    'url': endpoint,
+                    'format_id': 'rtmp',
+                    'app': compat_urllib_parse_urlparse(endpoint).path[1:],
+                    'play_path': play_path,
+                    'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
+                    'ext': 'flv',
+                    'rtmp_live': True,
+                })
+            elif determine_ext(endpoint) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    endpoint.replace('[video]', play_path),
+                    video_id, 'm4v',
+                    preference=1,  # Prefer hls since it allows to workaround georestriction
+                    m3u8_id='hls'))
+            else:
+                formats.append({
+                    'url': endpoint,
+                })
  
          thumbnails = []
          for image in media_json['images']:
@@ -65,16 +97,12 @@ class SpiegeltvIE(InfoExtractor):
  
          description = media_json['subtitle']
          duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
-        format = '16x9' if is_wide else '4x3'
-
-        url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
  
          return {
              'id': video_id,
              'title': title,
-            'url': url,
-            'ext': 'm4v',
              'description': description,
              'duration': duration,
-            'thumbnails': thumbnails
+            'thumbnails': thumbnails,
+            'formats': formats,
          }
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py

index 854d01beeb5cefd1f82d7991ee2c0ce75ad33dfa..e527aa97188b1860e054f8af7c7bd7a33301729e 100644 (file)
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -44,7 +44,7 @@ class SunPornoIE(InfoExtractor):
              webpage, 'duration', fatal=False))
  
          view_count = int_or_none(self._html_search_regex(
-            r'class="views">\s*(\d+)\s*<',
+            r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
              webpage, 'view count', fatal=False))
          comment_count = int_or_none(self._html_search_regex(
              r'(\d+)</b> Comments?',
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 56be526383b590d9b00759400a269d5164beef2f..d1b7264b4ca4a0cb72e491da26d7f5bbc1cc66b7 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -10,6 +10,7 @@ from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
      qualities,
+    determine_ext,
  )
  from ..compat import compat_ord
  
@@ -50,6 +51,17 @@ class TeamcocoIE(InfoExtractor):
              'params': {
                  'skip_download': True,  # m3u8 downloads
              }
+        }, {
+            'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
+            'info_dict': {
+                'id': '89341',
+                'ext': 'mp4',
+                'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+                'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 downloads
+            }
          }
      ]
      _VIDEO_ID_REGEXES = (
@@ -108,10 +120,24 @@ class TeamcocoIE(InfoExtractor):
          formats = []
          get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
          for filed in data['files']:
-            if filed['type'] == 'hls':
-                formats.extend(self._extract_m3u8_formats(
-                    filed['url'], video_id, ext='mp4'))
+            if determine_ext(filed['url']) == 'm3u8':
+                # compat_urllib_parse.urljoin does not work here
+                if filed['url'].startswith('/'):
+                    m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
+                else:
+                    m3u8_url = filed['url']
+                m3u8_formats = self._extract_m3u8_formats(
+                    m3u8_url, video_id, ext='mp4')
+                for m3u8_format in m3u8_formats:
+                    if m3u8_format not in formats:
+                        formats.append(m3u8_format)
+            elif determine_ext(filed['url']) == 'f4m':
+                # TODO Correct f4m extraction
+                continue
              else:
+                if filed['url'].startswith('/mp4:protected/'):
+                    # TODO Correct extraction for these files
+                    continue
                  m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
                  if m_format is not None:
                      format_id = m_format.group(1)
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py

index 025d0877cb928bb433aff9f6eff19a29d253e006..3a68eaa80ea6867e6806a4f242a8afc910b8ba06 100644 (file)
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -6,8 +6,8 @@ from .common import InfoExtractor
  
  class TF1IE(InfoExtractor):
      """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
-    _TESTS = {
+    _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+    _TESTS = [{
          'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
          'info_dict': {
              'id': '10635995',
@@ -32,7 +32,13 @@ class TF1IE(InfoExtractor):
              # Sometimes wat serves the whole file with the --test option
              'skip_download': True,
          },
-    }
+    }, {
+        'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index 92731ad3d7e8dcc3167b50ce1a15e3b035fb7721..83d833e30dbeb60caa43aa272bfd4d35f4507a53 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -26,7 +26,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
  class ThePlatformIE(InfoExtractor):
      _VALID_URL = r'''(?x)
          (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
-           (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
+           (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
           |theplatform:)(?P<id>[^/\?&]+)'''
  
      _TESTS = [{
@@ -56,6 +56,17 @@ class ThePlatformIE(InfoExtractor):
              # rtmp download
              'skip_download': True,
          }
+    }, {
+        'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
+        'info_dict': {
+            'id': 'yMBg9E8KFxZD',
+            'ext': 'mp4',
+            'description': 'md5:644ad9188d655b742f942bf2e06b002d',
+            'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
+        }
+    }, {
+        'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
+        'only_matching': True,
      }]
  
      @staticmethod
@@ -85,6 +96,11 @@ class ThePlatformIE(InfoExtractor):
          if not provider_id:
              provider_id = 'dJ5BDC'
  
+        path = provider_id
+        if mobj.group('media'):
+            path += '/media'
+        path += '/' + video_id
+
          if smuggled_data.get('force_smil_url', False):
              smil_url = url
          elif mobj.group('config'):
@@ -94,8 +110,7 @@ class ThePlatformIE(InfoExtractor):
              config = self._download_json(config_url, video_id, 'Downloading config')
              smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
          else:
-            smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
-                        'format=smil&mbr=true'.format(provider_id, video_id))
+            smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
  
          sig = smuggled_data.get('sig')
          if sig:
@@ -112,7 +127,7 @@ class ThePlatformIE(InfoExtractor):
          else:
              raise ExtractorError(error_msg, expected=True)
  
-        info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
+        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
          info_json = self._download_webpage(info_url, video_id)
          info = json.loads(info_json)
  
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py

index 9f9e388c50948d658d1022f8514122643b623a03..13263614cc06b099d929ee71564899ac3620f76a 100644 (file)
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -12,17 +12,22 @@ class TlcIE(DiscoveryIE):
      IE_NAME = 'tlc.com'
      _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
  
-    _TEST = {
+    # DiscoveryIE has _TESTS
+    _TESTS = [{
          'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
-        'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
          'info_dict': {
-            'id': '853232',
+            'id': '104493',
              'ext': 'mp4',
-            'title': 'Cake Boss: Too Big to Fly',
+            'title': 'Too Big to Fly',
              'description': 'Buddy has taken on a high flying task.',
              'duration': 119,
+            'timestamp': 1393365060,
+            'upload_date': '20140225',
          },
-    }
+        'params': {
+            'skip_download': True,  # requires ffmpef
+        },
+    }]
  
  
  class TlcDeIE(InfoExtractor):
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py

index 59af9aba06399cefcc6c2049c958dfb3819bb20a..c282865b2517d8cbd62df6f2dee0540146baae48 100644 (file)
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -33,7 +33,7 @@ class TNAFlixIE(InfoExtractor):
          },
          {
              'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
-            'matching_only': True,
+            'only_matching': True,
          }
      ]
  
@@ -51,9 +51,8 @@ class TNAFlixIE(InfoExtractor):
  
          age_limit = self._rta_search(webpage)
  
-        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
-        if duration:
-            duration = parse_duration(duration[1:])
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', default=None))
  
          cfg_url = self._proto_relative_url(self._html_search_regex(
              self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
@@ -62,14 +61,15 @@ class TNAFlixIE(InfoExtractor):
              cfg_url, display_id, note='Downloading metadata',
              transform_source=fix_xml_ampersands)
  
-        thumbnail = cfg_xml.find('./startThumb').text
+        thumbnail = self._proto_relative_url(
+            cfg_xml.find('./startThumb').text, 'http:')
  
          formats = []
          for item in cfg_xml.findall('./quality/item'):
              video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
              format_id = item.find('res').text
              fmt = {
-                'url': video_url,
+                'url': self._proto_relative_url(video_url, 'http:'),
                  'format_id': format_id,
              }
              m = re.search(r'^(\d+)', format_id)
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py

index d73ad3762a1b455cfd4bc384c27e2dd85e776dde..c9cb69333f7da0a9f4fe009e79b06433bca83726 100644 (file)
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor):
          webpage = self._download_webpage(req, display_id)
  
          flashvars = json.loads(self._html_search_regex(
-            r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
+            r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'))
  
          video_url = flashvars['video_url']
          if flashvars.get('encrypted') is True:
@@ -58,19 +58,19 @@ class Tube8IE(InfoExtractor):
          thumbnail = flashvars.get('image_url')
  
          title = self._html_search_regex(
-            r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
+            r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
          description = self._html_search_regex(
-            r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
+            r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
          uploader = self._html_search_regex(
-            r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
+            r'<span class="username">\s*(.+?)\s*<',
              webpage, 'uploader', fatal=False)
  
          like_count = int_or_none(self._html_search_regex(
-            r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
+            r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
          dislike_count = int_or_none(self._html_search_regex(
-            r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
+            r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
          view_count = self._html_search_regex(
-            r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
+            r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False)
          if view_count:
              view_count = str_to_int(view_count)
          comment_count = self._html_search_regex(
diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py

new file mode 100644 (file)

index 0000000..2c4b218
--- /dev/null
+++ b/youtube_dl/extractor/tubitv.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import codecs
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class TubiTvIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)'
+    _LOGIN_URL = 'http://tubitv.com/login'
+    _NETRC_MACHINE = 'tubitv'
+    _TEST = {
+        'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01',
+        'info_dict': {
+            'id': '54411',
+            'ext': 'mp4',
+            'title': 'The Kitchen Musical - EP01',
+            'thumbnail': 're:^https?://.*\.png$',
+            'description': 'md5:37532716166069b353e8866e71fefae7',
+            'duration': 2407,
+        },
+        'params': {
+            'skip_download': 'HLS download',
+        },
+    }
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        form_data = {
+            'username': username,
+            'password': password,
+        }
+        payload = compat_urllib_parse.urlencode(form_data).encode('utf-8')
+        request = compat_urllib_request.Request(self._LOGIN_URL, payload)
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        login_page = self._download_webpage(
+            request, None, False, 'Wrong login info')
+        if not re.search(r'id="tubi-logout"', login_page):
+            raise ExtractorError(
+                'Login failed (invalid username/password)', expected=True)
+
+    def _real_initialize(self):
+        self._login()
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
+            raise ExtractorError(
+                'This video requires login, use --username and --password '
+                'options to provide account credentials.', expected=True)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        duration = int_or_none(self._html_search_meta(
+            'video:duration', webpage, 'duration'))
+
+        apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
+        m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
+        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'description': description,
+            'duration': duration,
+        }
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py

index 828c808a6456b6b99b134cb7ae9d9017de9ad3aa..63c20310d8e54f98f2c19bd23f9ebca947ae23d3 100644 (file)
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from .pornhub import PornHubIE
  
  
  class TumblrIE(InfoExtractor):
@@ -28,6 +29,17 @@ class TumblrIE(InfoExtractor):
              'description': 'md5:dba62ac8639482759c8eb10ce474586a',
              'thumbnail': 're:http://.*\.jpg',
          }
+    }, {
+        'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
+        'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
+        'info_dict': {
+            'id': 'Wmur',
+            'ext': 'mp4',
+            'title': 'naked smoking & stretching',
+            'upload_date': '20150506',
+            'timestamp': 1430931613,
+        },
+        'add_ie': ['Vidme'],
      }]
  
      def _real_extract(self, url):
@@ -38,6 +50,16 @@ class TumblrIE(InfoExtractor):
          url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
          webpage = self._download_webpage(url, video_id)
  
+        vid_me_embed_url = self._search_regex(
+            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+            webpage, 'vid.me embed', default=None)
+        if vid_me_embed_url is not None:
+            return self.url_result(vid_me_embed_url, 'Vidme')
+
+        pornhub_url = PornHubIE._extract_url(webpage)
+        if pornhub_url:
+            return self.url_result(pornhub_url, 'PornHub')
+
          iframe_url = self._search_regex(
              r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
              webpage, 'iframe url')
diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py

index 29703a8a9a6ddf0981642c28cd2f1f68cc07c7b7..7ae63a4992a74368ec8b5f6a266a298cb6776b79 100644 (file)
--- a/youtube_dl/extractor/turbo.py
+++ b/youtube_dl/extractor/turbo.py
@@ -23,7 +23,7 @@ class TurboIE(InfoExtractor):
              'ext': 'mp4',
              'duration': 3715,
              'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
-            'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+            'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
              'thumbnail': 're:^https?://.*\.jpg$',
          }
      }
@@ -42,7 +42,7 @@ class TurboIE(InfoExtractor):
          title = xpath_text(item, './title', 'title')
          duration = int_or_none(xpath_text(item, './durate', 'duration'))
          thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
-        description = self._og_search_description(webpage)
+        description = self._html_search_meta('description', webpage)
  
          formats = []
          get_quality = qualities(['3g', 'sd', 'hq'])
diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py

new file mode 100644 (file)

index 0000000..3a4f393
--- /dev/null
+++ b/youtube_dl/extractor/tvc.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+)
+
+
+class TVCIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
+        'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
+        'info_dict': {
+            'id': '74622',
+            'ext': 'mp4',
+            'title': 'События. "События". Эфир от 22.05.2015 14:30',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 1122,
+        },
+    }
+
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            'http://www.tvc.ru/video/json/id/%s' % video_id, video_id)
+
+        formats = []
+        for info in video.get('path', {}).get('quality', []):
+            video_url = info.get('url')
+            if not video_url:
+                continue
+            format_id = self._search_regex(
+                r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url,
+                'format id', default=None)
+            formats.append({
+                'url': video_url,
+                'format_id': format_id,
+                'width': int_or_none(info.get('width')),
+                'height': int_or_none(info.get('height')),
+                'tbr': int_or_none(info.get('bitrate')),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video['title'],
+            'thumbnail': video.get('picture'),
+            'duration': int_or_none(video.get('duration')),
+            'formats': formats,
+        }
+
+
+class TVCArticleIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
+    _TESTS = [{
+        'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
+        'info_dict': {
+            'id': '74622',
+            'ext': 'mp4',
+            'title': 'События. "События". Эфир от 22.05.2015 14:30',
+            'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 1122,
+        },
+    }, {
+        'url': 'http://www.tvc.ru/news/show/id/69944',
+        'info_dict': {
+            'id': '75399',
+            'ext': 'mp4',
+            'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
+            'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 278,
+        },
+    }, {
+        'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#',
+        'info_dict': {
+            'id': '2185',
+            'ext': 'mp4',
+            'title': 'Ещё не поздно. Эфир от 03.08.2013',
+            'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 3316,
+        },
+    }]
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(url, self._match_id(url))
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'TVC',
+            'url': self._og_search_video_url(webpage),
+            'title': clean_html(self._og_search_title(webpage)),
+            'description': clean_html(self._og_search_description(webpage)),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py

index 102362b295450f58ff085ec9be7d21921a1ac494..dc3a8334a6b335143dff417d805a26df412d8783 100644 (file)
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -5,7 +5,9 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
      float_or_none,
+    int_or_none,
      parse_age_limit,
  )
  
@@ -24,22 +26,24 @@ class TvigleIE(InfoExtractor):
                  'display_id': 'sokrat',
                  'ext': 'flv',
                  'title': 'Сократ',
-                'description': 'md5:a05bd01be310074d5833efc6743be95e',
+                'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
                  'duration': 6586,
-                'age_limit': 0,
+                'age_limit': 12,
              },
+            'skip': 'georestricted',
          },
          {
              'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
-            'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574',
+            'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
              'info_dict': {
                  'id': '5142516',
-                'ext': 'mp4',
+                'ext': 'flv',
                  'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
                  'description': 'md5:027f7dc872948f14c96d19b4178428a4',
                  'duration': 186.080,
                  'age_limit': 0,
              },
+            'skip': 'georestricted',
          }, {
              'url': 'https://cloud.tvigle.ru/video/5267604/',
              'only_matching': True,
@@ -54,7 +58,7 @@ class TvigleIE(InfoExtractor):
          if not video_id:
              webpage = self._download_webpage(url, display_id)
              video_id = self._html_search_regex(
-                r'<li class="video-preview current_playing" id="(\d+)">',
+                r'class="video-preview current_playing" id="(\d+)">',
                  webpage, 'video id')
  
          video_data = self._download_json(
@@ -62,21 +66,34 @@ class TvigleIE(InfoExtractor):
  
          item = video_data['playlist']['items'][0]
  
+        videos = item.get('videos')
+
+        error_message = item.get('errorMessage')
+        if not videos and error_message:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
          title = item['title']
-        description = item['description']
-        thumbnail = item['thumbnail']
+        description = item.get('description')
+        thumbnail = item.get('thumbnail')
          duration = float_or_none(item.get('durationMilliseconds'), 1000)
          age_limit = parse_age_limit(item.get('ageRestrictions'))
  
          formats = []
          for vcodec, fmts in item['videos'].items():
-            for quality, video_url in fmts.items():
+            for format_id, video_url in fmts.items():
+                if format_id == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', m3u8_id=vcodec))
+                    continue
+                height = self._search_regex(
+                    r'^(\d+)[pP]$', format_id, 'height', default=None)
                  formats.append({
                      'url': video_url,
-                    'format_id': '%s-%s' % (vcodec, quality),
+                    'format_id': '%s-%s' % (vcodec, format_id),
                      'vcodec': vcodec,
-                    'height': int(quality[:-1]),
-                    'filesize': item['video_files_size'][vcodec][quality],
+                    'height': int_or_none(height),
+                    'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)),
                  })
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py

index e83e31a31640fa32e4a19a48a745d279a14d3753..79863e781fd41101c76659ab3b43a85433d25665 100644 (file)
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -26,6 +26,7 @@ class TVPlayIE(InfoExtractor):
             viasat4play\.no/programmer|
             tv6play\.no/programmer|
             tv3play\.dk/programmer|
+           play\.novatv\.bg/programi
          )/[^/]+/(?P<id>\d+)
          '''
      _TESTS = [
@@ -173,6 +174,22 @@ class TVPlayIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        {
+            'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
+            'info_dict': {
+                'id': '624952',
+                'ext': 'flv',
+                'title': 'Здравей, България (12.06.2015 г.) ',
+                'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
+                'duration': 8838,
+                'timestamp': 1434100372,
+                'upload_date': '20150612',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
      ]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py

index 67e8bfea03476ccf78d2470a973655f2a7213730..c1ee1decc433627ffa52196d44f7563b46d309cc 100644 (file)
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -15,7 +15,7 @@ class TwentyFourVideoIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://www.24video.net/video/view/1044982',
-            'md5': '48dd7646775690a80447a8dca6a2df76',
+            'md5': 'd041af8b5b4246ea466226a0d6693345',
              'info_dict': {
                  'id': '1044982',
                  'ext': 'mp4',
@@ -54,7 +54,7 @@ class TwentyFourVideoIE(InfoExtractor):
              webpage, 'upload date'))
  
          uploader = self._html_search_regex(
-            r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>',
+            r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
              webpage, 'uploader', fatal=False)
  
          view_count = int_or_none(self._html_search_regex(
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py

index dd026748dcbb536f9f49181b0d211bf0a9157777..722eb52368825b92c88506ff33d79bf1f2f91a32 100644 (file)
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -5,6 +5,7 @@ from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
      compat_urllib_request,
+    compat_urlparse,
  )
  from ..utils import (
      ExtractorError,
@@ -26,11 +27,21 @@ class Vbox7IE(InfoExtractor):
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        redirect_page, urlh = self._download_webpage_handle(url, video_id)
-        new_location = self._search_regex(r'window\.location = \'(.*)\';',
-                                          redirect_page, 'redirect location')
-        redirect_url = urlh.geturl() + new_location
-        webpage = self._download_webpage(redirect_url, video_id,
+        # need to get the page 3 times for the correct jsSecretToken cookie
+        # which is necessary for the correct title
+        def get_session_id():
+            redirect_page = self._download_webpage(url, video_id)
+            session_id_url = self._search_regex(
+                r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page,
+                'session id url')
+            self._download_webpage(
+                compat_urlparse.urljoin(url, session_id_url), video_id,
+                'Getting session id')
+
+        get_session_id()
+        get_session_id()
+
+        webpage = self._download_webpage(url, video_id,
                                           'Downloading redirect page')
  
          title = self._html_search_regex(r'<title>(.*)</title>',
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py

index e6ee1e4715efc5d47dd3f9aa32d6559a5737a8ea..f38a72fde8974a7a1ea290de04281f67079b1a16 100644 (file)
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+)
  
  
  class VGTVIE(InfoExtractor):
@@ -59,16 +62,16 @@ class VGTVIE(InfoExtractor):
          },
          {
              # streamType: live
-            'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
+            'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
              'info_dict': {
-                'id': '100015',
+                'id': '113063',
                  'ext': 'flv',
-                'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
-                'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
+                'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'description': 'md5:b3743425765355855f88e096acc93231',
                  'thumbnail': 're:^https?://.*\.jpg',
                  'duration': 0,
-                'timestamp': 1407423348,
-                'upload_date': '20140807',
+                'timestamp': 1432975582,
+                'upload_date': '20150530',
                  'view_count': int,
              },
              'params': {
@@ -97,7 +100,12 @@ class VGTVIE(InfoExtractor):
              % (host, video_id, HOST_WEBSITES[host]),
              video_id, 'Downloading media JSON')
  
+        if data.get('status') == 'inactive':
+            raise ExtractorError(
+                'Video %s is no longer available' % video_id, expected=True)
+
          streams = data['streamUrls']
+        stream_type = data.get('streamType')
  
          formats = []
  
@@ -107,7 +115,8 @@ class VGTVIE(InfoExtractor):
                  hls_url, video_id, 'mp4', m3u8_id='hls'))
  
          hds_url = streams.get('hds')
-        if hds_url:
+        # wasLive hds are always 404
+        if hds_url and stream_type != 'wasLive':
              formats.extend(self._extract_f4m_formats(
                  hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
                  video_id, f4m_id='hds'))
@@ -135,13 +144,14 @@ class VGTVIE(InfoExtractor):
  
          return {
              'id': video_id,
-            'title': data['title'],
+            'title': self._live_title(data['title']),
              'description': data['description'],
              'thumbnail': data['images']['main'] + '?t[]=900x506q80',
              'timestamp': data['published'],
              'duration': float_or_none(data['duration'], 1000),
              'view_count': data['displays'],
              'formats': formats,
+            'is_live': True if stream_type == 'live' else False,
          }
  
  
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py

index bd953fb4cc212f50dce2cac624c9391a14e82898..e0b55078b2c9af8bf654ee6cd6982305074cb39b 100644 (file)
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -10,7 +10,7 @@ from ..utils import (
  
  class VidmeIE(InfoExtractor):
      _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
-    _TEST = {
+    _TESTS = [{
          'url': 'https://vid.me/QNB',
          'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
          'info_dict': {
@@ -23,9 +23,14 @@ class VidmeIE(InfoExtractor):
              'upload_date': '20140725',
              'thumbnail': 're:^https?://.*\.jpg',
          },
-    }
+    }, {
+        # From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
+        'url': 'https://vid.me/e/Wmur',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
+        url = url.replace('vid.me/e/', 'vid.me/')
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 7f2fb1ca8896e29e48a41a9efddaded987ba1e96..52d10d2421231ab25e09ac635d5efc3477fa92e4 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,5 +1,7 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
+import json
  import time
  import hmac
  import hashlib
@@ -11,6 +13,7 @@ from ..utils import (
      parse_age_limit,
      parse_iso8601,
  )
+from ..compat import compat_urllib_request
  from .common import InfoExtractor
  
  
@@ -23,7 +26,9 @@ class VikiBaseIE(InfoExtractor):
      _APP_VERSION = '2.2.5.1428709186'
      _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
  
-    def _prepare_call(self, path, timestamp=None):
+    _NETRC_MACHINE = 'viki'
+
+    def _prepare_call(self, path, timestamp=None, post_data=None):
          path += '?' if '?' not in path else '&'
          if not timestamp:
              timestamp = int(time.time())
@@ -33,17 +38,19 @@ class VikiBaseIE(InfoExtractor):
              query.encode('ascii'),
              hashlib.sha1
          ).hexdigest()
-        return self._API_URL_TEMPLATE % (query, sig)
+        url = self._API_URL_TEMPLATE % (query, sig)
+        return compat_urllib_request.Request(
+            url, json.dumps(post_data).encode('utf-8')) if post_data else url
  
-    def _call_api(self, path, video_id, note, timestamp=None):
+    def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
          resp = self._download_json(
-            self._prepare_call(path, timestamp), video_id, note)
+            self._prepare_call(path, timestamp, post_data), video_id, note)
  
          error = resp.get('error')
          if error:
              if error == 'invalid timestamp':
                  resp = self._download_json(
-                    self._prepare_call(path, int(resp['current_timestamp'])),
+                    self._prepare_call(path, int(resp['current_timestamp']), post_data),
                      video_id, '%s (retry)' % note)
                  error = resp.get('error')
              if error:
@@ -56,6 +63,23 @@ class VikiBaseIE(InfoExtractor):
              '%s returned error: %s' % (self.IE_NAME, error),
              expected=True)
  
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'login_id': username,
+            'password': password,
+        }
+
+        self._call_api(
+            'sessions.json', None,
+            'Logging in as %s' % username, post_data=login_form)
+
  
  class VikiIE(VikiBaseIE):
      IE_NAME = 'viki'
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index cc384adbf9837f35f90c64d0e8dc0396b0b601ec..38ff3c1a949c0511d08518b77180be653defce62 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -13,6 +13,7 @@ from ..compat import (
  from ..utils import (
      ExtractorError,
      orderedSet,
+    str_to_int,
      unescapeHTML,
      unified_strdate,
  )
@@ -34,6 +35,7 @@ class VKIE(InfoExtractor):
                  'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                  'duration': 195,
                  'upload_date': '20120212',
+                'view_count': int,
              },
          },
          {
@@ -45,7 +47,8 @@ class VKIE(InfoExtractor):
                  'uploader': 'Tom Cruise',
                  'title': 'No name',
                  'duration': 9,
-                'upload_date': '20130721'
+                'upload_date': '20130721',
+                'view_count': int,
              }
          },
          {
@@ -59,6 +62,7 @@ class VKIE(InfoExtractor):
                  'title': 'Lin Dan',
                  'duration': 101,
                  'upload_date': '20120730',
+                'view_count': int,
              }
          },
          {
@@ -73,7 +77,8 @@ class VKIE(InfoExtractor):
                  'uploader': 'Триллеры',
                  'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
                  'duration': 8352,
-                'upload_date': '20121218'
+                'upload_date': '20121218',
+                'view_count': int,
              },
              'skip': 'Requires vk account credentials',
          },
@@ -100,6 +105,7 @@ class VKIE(InfoExtractor):
                  'title': 'Книга Илая',
                  'duration': 6771,
                  'upload_date': '20140626',
+                'view_count': int,
              },
              'skip': 'Only works from Russia',
          },
@@ -119,8 +125,8 @@ class VKIE(InfoExtractor):
              'act': 'login',
              'role': 'al_frame',
              'expire': '1',
-            'email': username,
-            'pass': password,
+            'email': username.encode('cp1251'),
+            'pass': password.encode('cp1251'),
          }
  
          request = compat_urllib_request.Request('https://login.vk.com/?act=login',
@@ -175,25 +181,29 @@ class VKIE(InfoExtractor):
                  m_rutube.group(1).replace('\\', ''))
              return self.url_result(rutube_url)
  
-        m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
+        m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
          if m_opts:
-            m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
+            m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
              if m_opts_url:
                  opts_url = m_opts_url.group(1)
                  if opts_url.startswith('//'):
                      opts_url = 'http:' + opts_url
                  return self.url_result(opts_url)
  
-        data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
+        data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
          data = json.loads(data_json)
  
          # Extract upload date
          upload_date = None
-        mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+        mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
          if mobj is not None:
              mobj.group(1) + ' ' + mobj.group(2)
              upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
  
+        view_count = str_to_int(self._search_regex(
+            r'"mv_views_count_number"[^>]*>([\d,.]+) views<',
+            info_page, 'view count', fatal=False))
+
          formats = [{
              'format_id': k,
              'url': v,
@@ -210,6 +220,7 @@ class VKIE(InfoExtractor):
              'uploader': data.get('md_author'),
              'duration': data.get('duration'),
              'upload_date': upload_date,
+            'view_count': view_count,
          }
  
  
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py

index 97b98bbe88715f644da6bec1709d697af2c8e0e0..ced3a10cd417840796d4240cfd1d8163f3148134 100644 (file)
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -1,123 +1,235 @@
  # coding: utf-8
-
  from __future__ import unicode_literals
  
-import math
-import random
-import re
-import time
+import base64
  
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..utils import ExtractorError
+
+from ..compat import (
+    compat_urllib_parse,
+    compat_ord,
+    compat_urllib_request,
  )
  
  
  class YoukuIE(InfoExtractor):
+    IE_NAME = 'youku'
      _VALID_URL = r'''(?x)
          (?:
              http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
              youku:)
          (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
      '''
-    _TEST = {
-        'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
-        'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
-        'params': {
-            'test': False
+
+    _TESTS = [{
+        'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
+        'md5': '5f3af4192eabacc4501508d54a8cabd7',
+        'info_dict': {
+            'id': 'XMTc1ODE5Njcy_part1',
+            'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
+            'ext': 'flv'
+        }
+    }, {
+        'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
+        'only_matching': True,
+    }, {
+        'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
+        'info_dict': {
+            'id': 'XODgxNjg1Mzk2',
+            'title': '武媚娘传奇 85',
          },
+        'playlist_count': 11,
+    }, {
+        'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
          'info_dict': {
-            'id': 'XNDgyMDQ2NTQw_part00',
-            'ext': 'flv',
-            'title': 'youtube-dl test video "\'/\\ä↭𝕐'
+            'id': 'XMTI1OTczNDM5Mg',
+            'title': '花千骨 04',
+        },
+        'playlist_count': 13,
+        'skip': 'Available in China only',
+    }]
+
+    def construct_video_urls(self, data1, data2):
+        # get sid, token
+        def yk_t(s1, s2):
+            ls = list(range(256))
+            t = 0
+            for i in range(256):
+                t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
+                ls[i], ls[t] = ls[t], ls[i]
+            s = bytearray()
+            x, y = 0, 0
+            for i in range(len(s2)):
+                y = (y + 1) % 256
+                x = (x + ls[y]) % 256
+                ls[x], ls[y] = ls[y], ls[x]
+                s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
+            return bytes(s)
+
+        sid, token = yk_t(
+            b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
+        ).decode('ascii').split('_')
+
+        # get oip
+        oip = data2['ip']
+
+        # get fileid
+        string_ls = list(
+            'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
+        shuffled_string_ls = []
+        seed = data1['seed']
+        N = len(string_ls)
+        for ii in range(N):
+            seed = (seed * 0xd3 + 0x754f) % 0x10000
+            idx = seed * len(string_ls) // 0x10000
+            shuffled_string_ls.append(string_ls[idx])
+            del string_ls[idx]
+
+        fileid_dict = {}
+        for format in data1['streamtypes']:
+            streamfileid = [
+                int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
+            fileid = ''.join(
+                [shuffled_string_ls[i] for i in streamfileid])
+            fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
+
+        def get_fileid(format, n):
+            fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
+            return fileid
+
+        # get ep
+        def generate_ep(format, n):
+            fileid = get_fileid(format, n)
+            ep_t = yk_t(
+                b'bf7e5f01',
+                ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
+            )
+            ep = base64.b64encode(ep_t).decode('ascii')
+            return ep
+
+        # generate video_urls
+        video_urls_dict = {}
+        for format in data1['streamtypes']:
+            video_urls = []
+            for dt in data1['segs'][format]:
+                n = str(int(dt['no']))
+                param = {
+                    'K': dt['k'],
+                    'hd': self.get_hd(format),
+                    'myp': 0,
+                    'ts': dt['seconds'],
+                    'ypp': 0,
+                    'ctype': 12,
+                    'ev': 1,
+                    'token': token,
+                    'oip': oip,
+                    'ep': generate_ep(format, n)
+                }
+                video_url = \
+                    'http://k.youku.com/player/getFlvPath/' + \
+                    'sid/' + sid + \
+                    '_' + str(int(n) + 1).zfill(2) + \
+                    '/st/' + self.parse_ext_l(format) + \
+                    '/fileid/' + get_fileid(format, n) + '?' + \
+                    compat_urllib_parse.urlencode(param)
+                video_urls.append(video_url)
+            video_urls_dict[format] = video_urls
+
+        return video_urls_dict
+
+    def get_hd(self, fm):
+        hd_id_dict = {
+            'flv': '0',
+            'mp4': '1',
+            'hd2': '2',
+            'hd3': '3',
+            '3gp': '0',
+            '3gphd': '1'
          }
-    }
-
-    def _gen_sid(self):
-        nowTime = int(time.time() * 1000)
-        random1 = random.randint(1000, 1998)
-        random2 = random.randint(1000, 9999)
-
-        return "%d%d%d" % (nowTime, random1, random2)
-
-    def _get_file_ID_mix_string(self, seed):
-        mixed = []
-        source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
-        seed = float(seed)
-        for i in range(len(source)):
-            seed = (seed * 211 + 30031) % 65536
-            index = math.floor(seed / 65536 * len(source))
-            mixed.append(source[int(index)])
-            source.remove(source[int(index)])
-        # return ''.join(mixed)
-        return mixed
-
-    def _get_file_id(self, fileId, seed):
-        mixed = self._get_file_ID_mix_string(seed)
-        ids = fileId.split('*')
-        realId = []
-        for ch in ids:
-            if ch:
-                realId.append(mixed[int(ch)])
-        return ''.join(realId)
+        return hd_id_dict[fm]
+
+    def parse_ext_l(self, fm):
+        ext_dict = {
+            'flv': 'flv',
+            'mp4': 'mp4',
+            'hd2': 'flv',
+            'hd3': 'flv',
+            '3gp': 'flv',
+            '3gphd': 'mp4'
+        }
+        return ext_dict[fm]
+
+    def get_format_name(self, fm):
+        _dict = {
+            '3gp': 'h6',
+            '3gphd': 'h5',
+            'flv': 'h4',
+            'mp4': 'h3',
+            'hd2': 'h2',
+            'hd3': 'h1'
+        }
+        return _dict[fm]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
+        video_id = self._match_id(url)
  
-        config = self._download_json(info_url, video_id)
+        def retrieve_data(req_url, note):
+            req = compat_urllib_request.Request(req_url)
  
-        error_code = config['data'][0].get('error_code')
-        if error_code:
-            # -8 means blocked outside China.
-            error = config['data'][0].get('error')  # Chinese and English, separated by newline.
-            raise ExtractorError(error or 'Server reported error %i' % error_code,
-                                 expected=True)
+            cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+            if cn_verification_proxy:
+                req.add_header('Ytdl-request-proxy', cn_verification_proxy)
  
-        video_title = config['data'][0]['title']
-        seed = config['data'][0]['seed']
+            raw_data = self._download_json(req, video_id, note=note)
+            return raw_data['data'][0]
  
-        format = self._downloader.params.get('format', None)
-        supported_format = list(config['data'][0]['streamfileids'].keys())
+        # request basic data
+        data1 = retrieve_data(
+            'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
+            'Downloading JSON metadata 1')
+        data2 = retrieve_data(
+            'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
+            'Downloading JSON metadata 2')
  
-        # TODO proper format selection
-        if format is None or format == 'best':
-            if 'hd2' in supported_format:
-                format = 'hd2'
+        error_code = data1.get('error_code')
+        if error_code:
+            error = data1.get('error')
+            if error is not None and '因版权原因无法观看此视频' in error:
+                raise ExtractorError(
+                    'Youku said: Sorry, this video is available in China only', expected=True)
              else:
-                format = 'flv'
-            ext = 'flv'
-        elif format == 'worst':
-            format = 'mp4'
-            ext = 'mp4'
-        else:
-            format = 'flv'
-            ext = 'flv'
-
-        fileid = config['data'][0]['streamfileids'][format]
-        keys = [s['k'] for s in config['data'][0]['segs'][format]]
-        # segs is usually a dictionary, but an empty *list* if an error occured.
-
-        files_info = []
-        sid = self._gen_sid()
-        fileid = self._get_file_id(fileid, seed)
-
-        # column 8,9 of fileid represent the segment number
-        # fileid[7:9] should be changed
-        for index, key in enumerate(keys):
-            temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
-            download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
-
-            info = {
-                'id': '%s_part%02d' % (video_id, index),
-                'url': download_url,
-                'uploader': None,
-                'upload_date': None,
-                'title': video_title,
-                'ext': ext,
-            }
-            files_info.append(info)
-
-        return files_info
+                msg = 'Youku server reported error %i' % error_code
+                if error is not None:
+                    msg += ': ' + error
+                raise ExtractorError(msg)
+
+        title = data1['title']
+
+        # generate video_urls_dict
+        video_urls_dict = self.construct_video_urls(data1, data2)
+
+        # construct info
+        entries = [{
+            'id': '%s_part%d' % (video_id, i + 1),
+            'title': title,
+            'formats': [],
+            # some formats are not available for all parts, we have to detect
+            # which one has all
+        } for i in range(max(len(v) for v in data1['segs'].values()))]
+        for fm in data1['streamtypes']:
+            video_urls = video_urls_dict[fm]
+            for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries):
+                entry['formats'].append({
+                    'url': video_url,
+                    'format_id': self.get_format_name(fm),
+                    'ext': self.parse_ext_l(fm),
+                    'filesize': int(seg['size']),
+                })
+
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'title': title,
+            'entries': entries,
+        }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 0301682b8dd228cab336bc1e68eaf868660fd5c7..a3da56c1413494ffd73d523a042959af16956ff0 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -234,6 +234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '44': {'ext': 'webm', 'width': 854, 'height': 480},
          '45': {'ext': 'webm', 'width': 1280, 'height': 720},
          '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
+        '59': {'ext': 'mp4', 'width': 854, 'height': 480},
+        '78': {'ext': 'mp4', 'width': 854, 'height': 480},
  
  
          # 3d videos
@@ -785,7 +787,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              s = mobj.group(1)
              dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
              return '/signature/%s' % dec_s
-        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
+        dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
          dash_doc = self._download_xml(
              dash_manifest_url, video_id,
              note='Downloading DASH manifest',
@@ -1290,7 +1292,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
      def _extract_playlist(self, playlist_id):
          url = self._TEMPLATE_URL % playlist_id
          page = self._download_webpage(url, playlist_id)
-        more_widget_html = content_html = page
  
          for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
              match = match.strip()
@@ -1310,36 +1311,36 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                  self.report_warning('Youtube gives an alert message: ' + match)
  
          # Extract the video ids from the playlist pages
-        ids = []
-
-        for page_num in itertools.count(1):
-            matches = re.finditer(self._VIDEO_RE, content_html)
-            # We remove the duplicates and the link with index 0
-            # (it's not the first video of the playlist)
-            new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
-            ids.extend(new_ids)
-
-            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
-            if not mobj:
-                break
+        def _entries():
+            more_widget_html = content_html = page
+            for page_num in itertools.count(1):
+                matches = re.finditer(self._VIDEO_RE, content_html)
+                # We remove the duplicates and the link with index 0
+                # (it's not the first video of the playlist)
+                new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
+                for vid_id in new_ids:
+                    yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
+
+                mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+                if not mobj:
+                    break
  
-            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
-                'Downloading page #%s' % page_num,
-                transform_source=uppercase_escape)
-            content_html = more['content_html']
-            if not content_html.strip():
-                # Some webpages show a "Load more" button but they don't
-                # have more videos
-                break
-            more_widget_html = more['load_more_widget_html']
+                more = self._download_json(
+                    'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                    'Downloading page #%s' % page_num,
+                    transform_source=uppercase_escape)
+                content_html = more['content_html']
+                if not content_html.strip():
+                    # Some webpages show a "Load more" button but they don't
+                    # have more videos
+                    break
+                more_widget_html = more['load_more_widget_html']
  
          playlist_title = self._html_search_regex(
              r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
              page, 'title')
  
-        url_results = self._ids_to_results(ids)
-        return self.playlist_result(url_results, playlist_id, playlist_title)
+        return self.playlist_result(_entries(), playlist_id, playlist_title)
  
      def _real_extract(self, url):
          # Extract playlist id
@@ -1399,6 +1400,24 @@ class YoutubeChannelIE(InfoExtractor):
          channel_id = self._match_id(url)
  
          url = self._TEMPLATE_URL % channel_id
+
+        # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
+        # Workaround by extracting as a playlist if managed to obtain channel playlist URL
+        # otherwise fallback on channel by page extraction
+        channel_page = self._download_webpage(
+            url + '?view=57', channel_id,
+            'Downloading channel page', fatal=False)
+        channel_playlist_id = self._html_search_meta(
+            'channelId', channel_page, 'channel id', default=None)
+        if not channel_playlist_id:
+            channel_playlist_id = self._search_regex(
+                r'data-channel-external-id="([^"]+)"',
+                channel_page, 'channel id', default=None)
+        if channel_playlist_id and channel_playlist_id.startswith('UC'):
+            playlist_id = 'UU' + channel_playlist_id[2:]
+            return self.url_result(
+                compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
+
          channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
          autogenerated = re.search(r'''(?x)
                  class="[^"]*?(?:
@@ -1487,7 +1506,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
  
          for pagenum in itertools.count(1):
              url_query = {
-                'search_query': query,
+                'search_query': query.encode('utf-8'),
                  'page': pagenum,
                  'spf': 'navigate',
              }
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index 5a2315bd96ce0c6abfdf4a8bea65aa68e6fa370b..740458e51483f45f8d8474d68edaaac48b24941e 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -145,7 +145,7 @@ def parseOpts(overrideArguments=None):
      general.add_option(
          '--list-extractors',
          action='store_true', dest='list_extractors', default=False,
-        help='List all supported extractors and the URLs they would handle')
+        help='List all supported extractors')
      general.add_option(
          '--extractor-descriptions',
          action='store_true', dest='list_extractor_descriptions', default=False,
@@ -725,7 +725,7 @@ def parseOpts(overrideArguments=None):
          metavar='POLICY', dest='fixup', default='detect_or_warn',
          help='Automatically correct known faults of the file. '
               'One of never (do nothing), warn (only emit a warning), '
-             'detect_or_warn(the default; fix file if we can, warn otherwise)')
+             'detect_or_warn (the default; fix file if we can, warn otherwise)')
      postproc.add_option(
          '--prefer-avconv',
          action='store_false', dest='prefer_ffmpeg',
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py

index 8f825f7859058c9c40cd55e50ec9832a92858c32..e19dbf73d5fe36c602d9ffb83cd2d02ab39cb5e1 100644 (file)
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -35,6 +35,11 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
  
          thumbnail_filename = info['thumbnails'][-1]['filename']
  
+        if not os.path.exists(encodeFilename(thumbnail_filename)):
+            self._downloader.report_warning(
+                'Skipping embedding the thumbnail because the file is missing.')
+            return [], info
+
          if info['ext'] == 'mp3':
              options = [
                  '-c', 'copy', '-map', '0', '-map', '1',
@@ -49,7 +54,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
              os.remove(encodeFilename(filename))
              os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  
-        elif info['ext'] == 'm4a':
+        elif info['ext'] in ['m4a', 'mp4']:
              if not check_executable('AtomicParsley', ['-v']):
                  raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
  
@@ -82,6 +87,6 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
                  os.remove(encodeFilename(filename))
                  os.rename(encodeFilename(temp_filename), encodeFilename(filename))
          else:
-            raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.')
+            raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
  
          return [], info
diff --git a/youtube_dl/update.py b/youtube_dl/update.py

index de3169eef1d6ec29d82a60b2f4b6a68f49d7dd4e..fc7ac8305d71c8cce077ef3040cd0903ac9f09c5 100644 (file)
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -50,7 +50,7 @@ def rsa_verify(message, signature, key):
  def update_self(to_screen, verbose):
      """Update the program file with the latest version from the repository"""
  
-    UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
+    UPDATE_URL = "https://rg3.github.io/youtube-dl/update/"
      VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
      JSON_URL = UPDATE_URL + 'versions.json'
      UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index b333851534e9edd9c75ff70ee4350874530ea8f7..34a13cb815d2c11f85f33f725d2f9bcac2a11865 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.05.20'
+__version__ = '2015.06.15'
author	Sergey M․ <dstftw@gmail.com>
	Fri, 19 Jun 2015 17:00:00 +0000 (23:00 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 19 Jun 2015 17:00:00 +0000 (23:00 +0600)
AUTHORS		patch \| blob \| history
README.md		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
test/test_aes.py		patch \| blob \| history
test/test_subtitles.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/aftonbladet.py		patch \| blob \| history
youtube_dl/extractor/bbccouk.py		patch \| blob \| history
youtube_dl/extractor/bilibili.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/cbs.py		patch \| blob \| history
youtube_dl/extractor/cinemassacre.py		patch \| blob \| history
youtube_dl/extractor/cnet.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/crunchyroll.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/discovery.py		patch \| blob \| history
youtube_dl/extractor/dramafever.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/empflix.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/firedrive.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/fivetv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/francetv.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/imgur.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history
youtube_dl/extractor/iprima.py		patch \| blob \| history
youtube_dl/extractor/iqiyi.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/izlesene.py		patch \| blob \| history
youtube_dl/extractor/kickstarter.py		patch \| blob \| history
youtube_dl/extractor/lifenews.py		patch \| blob \| history
youtube_dl/extractor/liveleak.py		patch \| blob \| history
youtube_dl/extractor/naver.py		patch \| blob \| history
youtube_dl/extractor/nfl.py		patch \| blob \| history
youtube_dl/extractor/niconico.py		patch \| blob \| history
youtube_dl/extractor/noco.py		patch \| blob \| history
youtube_dl/extractor/nova.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nowtv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/odnoklassniki.py		patch \| blob \| history
youtube_dl/extractor/patreon.py		patch \| blob \| history
youtube_dl/extractor/porn91.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/pornovoisines.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/qqmusic.py		patch \| blob \| history
youtube_dl/extractor/rtbf.py		patch \| blob \| history
youtube_dl/extractor/rtlnl.py		patch \| blob \| history
youtube_dl/extractor/rtlnow.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/ruutu.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/safari.py		patch \| blob \| history
youtube_dl/extractor/senateisvp.py		patch \| blob \| history
youtube_dl/extractor/sockshare.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/soompi.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/spankwire.py		patch \| blob \| history
youtube_dl/extractor/spiegeltv.py		patch \| blob \| history
youtube_dl/extractor/sunporno.py		patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/tf1.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/tlc.py		patch \| blob \| history
youtube_dl/extractor/tnaflix.py		patch \| blob \| history
youtube_dl/extractor/tube8.py		patch \| blob \| history
youtube_dl/extractor/tubitv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tumblr.py		patch \| blob \| history
youtube_dl/extractor/turbo.py		patch \| blob \| history
youtube_dl/extractor/tvc.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tvigle.py		patch \| blob \| history
youtube_dl/extractor/tvplay.py		patch \| blob \| history
youtube_dl/extractor/twentyfourvideo.py		patch \| blob \| history
youtube_dl/extractor/vbox7.py		patch \| blob \| history
youtube_dl/extractor/vgtv.py		patch \| blob \| history
youtube_dl/extractor/vidme.py		patch \| blob \| history
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/extractor/youku.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/postprocessor/embedthumbnail.py		patch \| blob \| history
youtube_dl/update.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history