transistorfm extractors

based on extractor by @asz: https://github.com/ytdl-org/youtube-dl/pull/28022
2021-02-24 12:20:49 +01:00 · 2021-02-24 12:20:49 +01:00 · 8d30f19740
parent 1c3ca4fe2c
commit 8d30f19740
2 changed files with 77 additions and 0 deletions
--- a/haruhi_dl/extractor/extractors.py
+++ b/haruhi_dl/extractor/extractors.py
@ -1213,6 +1213,10 @@ from .toongoggles import ToonGogglesIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
+from .transistorfm import (
+    TransistorFMIE,
+    TransistorFMShareIE,
+)
 from .trilulilu import TriluliluIE
 from .trunews import TruNewsIE
 from .trutv import TruTVIE
--- a/haruhi_dl/extractor/transistorfm.py
+++ b/haruhi_dl/extractor/transistorfm.py
@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
+
+
+class TransistorFMIE(InfoExtractor):
+    _VALID_URL = r'https://[^/]+\.transistor\.fm/episodes/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://makingcents.transistor.fm/episodes/the-tech-stock-bubble',
+        'info_dict': {
+            'id': 'the-tech-stock-bubble',
+            'ext': 'mp3',
+            'title': 'A little bit of Coin',
+            'description': 'Today we chat about the CRYPTOCURRENCY',
+            'uploader': 'Making Cent$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        return {
+            'url': self._html_search_regex(r'<body\b[^>]+\bdata-default-episode-url="([^"]+)"', webpage, 'media url'),
+            'id': video_id,
+            'title': self._html_search_regex(r'<body\b[^>]+\bdata-default-episode-title="([^"]+)"', webpage, 'episode title'),
+            'description': self._html_search_meta('description', webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'uploader': self._og_search_property('site_name', webpage),
+        }
+
+
+class TransistorFMShareIE(InfoExtractor):
+    _VALID_URL = r'https://share\.transistor\.fm/s/(?P<id>[0-9a-f]{8})'
+    _TESTS = [{
+        'url': 'https://share.transistor.fm/s/e9d040c0',
+        'info_dict': {
+            'id': 'e9d040c0',
+            'ext': 'mp3',
+            'duration': 1132,
+            'artist': 'Батенька, да вы трансформер',
+            'title': 'Эпизод 19. Люди и фанатики',
+            'description': 'md5:cc2561a69442b97d7ea5c3d6351a3dd6',
+            'thumbnail': 'https://images.transistor.fm/file/transistor/images/episode/373966/medium_1602593993-artwork.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        data = self._parse_json(self._html_search_regex(
+            r'<div id="embed-app" data-episodes="([^"]+)"',
+            webpage, 'JSON data block'), video_id)
+
+        if not data:
+            raise ExtractorError('No episode found')
+
+        data = data[0]
+
+        return {
+            'url': data['trackable_media_url'],
+            'id': video_id,
+            'title': data['title'],
+            'description': data.get('formatted_summary'),
+            'thumbnail': data.get('artwork'),
+            'duration': data.get('duration'),
+            'artist': data.get('author'),
+        }