[aparat] Fix extraction

author Yen Chi Hsuan <yan12125@gmail.com>

Mon, 8 Aug 2016 04:59:07 +0000 (12:59 +0800)

committer Yen Chi Hsuan <yan12125@gmail.com>

Mon, 8 Aug 2016 04:59:07 +0000 (12:59 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Mon, 8 Aug 2016 04:59:07 +0000 (12:59 +0800)
committer Yen Chi Hsuan <yan12125@gmail.com>
Mon, 8 Aug 2016 04:59:07 +0000 (12:59 +0800)
diff --git a/ChangeLog b/ChangeLog

index 32a96432bc200239ac632591e9829171f296bb0e..657ff3e48b324a9b6794700ecc113c4b0be51de1 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,7 @@ version <unreleased>
  
  Extractors
  * [kuwo:singer] Fix extraction
+* [aparat] Fix extraction
  
  version 2016.08.07
  
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py

index 63429780e8abf528165daf7e50a6317bce9a6c7d..025e29aa46fe5db97c323fa95d947470f1f2023a 100644 (file)
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -1,8 +1,6 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
@@ -15,7 +13,7 @@ class AparatIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.aparat.com/v/wP8On',
-        'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
+        'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
          'info_dict': {
              'id': 'wP8On',
              'ext': 'mp4',
@@ -31,13 +29,13 @@ class AparatIE(InfoExtractor):
          # Note: There is an easier-to-parse configuration at
          # http://www.aparat.com/video/video/config/videohash/%video_id
          # but the URL in there does not work
-        embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
-                     video_id + '/vt/frame')
+        embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
          webpage = self._download_webpage(embed_url, video_id)
  
-        video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
-            r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
-        for i, video_url in enumerate(video_urls):
+        file_list = self._parse_json(self._search_regex(
+            r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
+        for i, item in enumerate(file_list[0]):
+            video_url = item['file']
              req = HEADRequest(video_url)
              res = self._request_webpage(
                  req, video_id, note='Testing video URL %d' % i, errnote=False)
author	Yen Chi Hsuan <yan12125@gmail.com>
	Mon, 8 Aug 2016 04:59:07 +0000 (12:59 +0800)
committer	Yen Chi Hsuan <yan12125@gmail.com>
	Mon, 8 Aug 2016 04:59:07 +0000 (12:59 +0800)
ChangeLog		patch \| blob \| history
youtube_dl/extractor/aparat.py		patch \| blob \| history