youtube_dl/extractor/soundgasm.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7
   8
   9 class SoundgasmIE(InfoExtractor):
  10     IE_NAME = 'soundgasm'
  11     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
  12     _TEST = {
  13         'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
  14         'md5': '010082a2c802c5275bb00030743e75ad',
  15         'info_dict': {
  16             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
  17             'ext': 'm4a',
  18             'title': 'Piano sample',
  19             'description': 'Royalty Free Sample Music',
  20             'uploader': 'ytdl',
  21         }
  22     }
  23
  24     def _real_extract(self, url):
  25         mobj = re.match(self._VALID_URL, url)
  26         display_id = mobj.group('display_id')
  27
  28         webpage = self._download_webpage(url, display_id)
  29
  30         audio_url = self._html_search_regex(
  31             r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
  32             'audio URL', group='url')
  33
  34         title = self._search_regex(
  35             r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
  36             webpage, 'title', default=display_id)
  37
  38         description = self._html_search_regex(
  39             (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
  40              r'(?s)<li>Description:\s(.*?)<\/li>'),
  41             webpage, 'description', fatal=False)
  42
  43         audio_id = self._search_regex(
  44             r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
  45
  46         return {
  47             'id': audio_id,
  48             'display_id': display_id,
  49             'url': audio_url,
  50             'vcodec': 'none',
  51             'title': title,
  52             'description': description,
  53             'uploader': mobj.group('user'),
  54         }
  55
  56
  57 class SoundgasmProfileIE(InfoExtractor):
  58     IE_NAME = 'soundgasm:profile'
  59     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
  60     _TEST = {
  61         'url': 'http://soundgasm.net/u/ytdl',
  62         'info_dict': {
  63             'id': 'ytdl',
  64         },
  65         'playlist_count': 1,
  66     }
  67
  68     def _real_extract(self, url):
  69         profile_id = self._match_id(url)
  70
  71         webpage = self._download_webpage(url, profile_id)
  72
  73         entries = [
  74             self.url_result(audio_url, 'Soundgasm')
  75             for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
  76
  77         return self.playlist_result(entries, profile_id)