diff --git a/haruhi_dl/extractor/extractors.py b/haruhi_dl/extractor/extractors.py index 217bff4d0..7ee2c15ff 100644 --- a/haruhi_dl/extractor/extractors.py +++ b/haruhi_dl/extractor/extractors.py @@ -1423,6 +1423,7 @@ from .wsj import ( WSJArticleIE, ) from .wwe import WWEIE +from .wykop import WykopIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE diff --git a/haruhi_dl/extractor/generic.py b/haruhi_dl/extractor/generic.py index e4a34ca7a..85c8142a0 100644 --- a/haruhi_dl/extractor/generic.py +++ b/haruhi_dl/extractor/generic.py @@ -1593,6 +1593,9 @@ class GenericIE(InfoExtractor): 'uploader': 'Lake8737', }, 'add_ie': [LiveLeakIE.ie_key()], + 'params': { + 'force_generic_extractor': True, + }, }, # Another LiveLeak embed pattern (#13336) { diff --git a/haruhi_dl/extractor/wykop.py b/haruhi_dl/extractor/wykop.py new file mode 100644 index 000000000..91fe30045 --- /dev/null +++ b/haruhi_dl/extractor/wykop.py @@ -0,0 +1,141 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import ( + InfoExtractor, + ExtractorError, +) +from ..utils import ( + clean_html, + int_or_none, + str_or_none, + url_or_none, +) + +import re + + +class WykopIE(InfoExtractor): + IE_NAME = 'wykop' + _VALID_URL = r'https?://(?:www\.)?wykop\.pl/(?Plink|wpis)/(?P\d+)(?:/comment/\d+|/[^#/\s]+|/#comment-(?P\d+))*' + _TESTS = [{ + 'url': 'https://www.wykop.pl/link/5789155', + 'info_dict': { + 'id': '7b27ly', # streamable id + 'title': 'Wypadek pijanych(prawdopodobnie) idiotów widziany z wnętrza samochodu.', + 'ext': 'mp4', + 'uploader': 'errorek95', + 'timestamp': 1604847466.56506, + 'upload_date': '20201108', + }, + }, { + 'url': 'https://www.wykop.pl/link/5787937/#comment-84100287', + 'info_dict': { + 'id': '2uz8kj', + 'title': 'RETROWIRUS - ', + 'ext': 'mp4', + 'timestamp': 1604668080.99827, + 'uploader': 'RETROWIRUS', + 'upload_date': '20201107', + }, + }, { + 'url': 'https://www.wykop.pl/wpis/53405999', + 'info_dict': { + 'id': 'yeujjq', # streamable + 'title': 'Potat - Chłop przerabia krzyż #heheszki #niewiemjaktootagowac', + 'ext': 'mp4', + 'uploader': 'Potat', + 'upload_date': '20201108', + 'timestamp': 1604825368.86073, + } + }, { + 'url': 'https://www.wykop.pl/wpis/53415243/m00d-neuropa-bekazprawakow/#comment-189438995', + 'info_dict': { + 'id': 'jtxd8d', + 'title': 'Nox_ - ( ͡° ͜ʖ ͡°)', + 'ext': 'mp4', + 'upload_date': '20201108', + 'uploader': 'Nox_', + 'timestamp': 1603830140.88605, + } + }, { + 'url': 'https://wykop.pl/wpis/53404647/pokaz-spoiler/', + 'only_matching': True, + }, { + 'url': 'http://www.wykop.pl/wpis/53415243/#comment-189438995', + 'only_matching': True, + }, { + 'url': 'https://www.wykop.pl/link/5789155/wypadek-pijanych-prawdopodobnie-idiotow-widziany-z-wnetrza-samochodu/', + 'only_matching': True, + }, { + 'url': 'https://www.wykop.pl/link/5785947/comment/84053103/#comment-84053103', + 'only_matching': True, + }] + + def _real_extract(self, source_url): + mobj = re.match(self._VALID_URL, source_url) + id, comment_id = mobj.group('id', 'comment_id') + method_1 = 'links' if mobj.group('type') == 'link' else 'entries' + method_2 = 'comment' if comment_id \ + else 'link' if method_1 == 'links' \ + else 'entry' + + meta = self._download_json( + 'https://a2.wykop.pl/%s/%s/%s/appkey/aNd401dAPp' % (method_1, method_2, comment_id if comment_id else id), + comment_id or id) + + if meta.get('error'): + error = meta['error'] + raise ExtractorError('Wykop.pl said: "%s" (%d)' % (error['message_en'], error['code'])) + + data = meta['data'] + self.to_screen(data) + + uploader = uploader_url = alt_title = upload_date = None + # author can be null, just wypiek api things - https://www.wykop.pl/wpis/36527259/ + if 'author' in data: + author = data['author'] + uploader = author['login'] + uploader_url = 'https://www.wykop.pl/ludzie/%s' % uploader if '.' not in uploader else None + + if method_1 == 'entries' or method_2 == 'comment': + # links/link, entries/entry, entries/comment + if 'embed' not in data: + raise ExtractorError('No embed found in the %s' % method_2) + embed = data['embed'] + if not embed['type'] == 'video': + raise ExtractorError('No video found in the %s' % method_2) + url = embed['url'] + + title = '%s - %s' % (uploader, clean_html(data.get('body') or '')) + else: + # links/comment + url = data['source_url'] + title = clean_html(data['title']) + alt_title = clean_html(data['description']) + + embed_or_data = data.get('embed') or data + age_limit = 18 if embed_or_data.get('plus18') else 0 + thumbnail = url_or_none(embed_or_data.get('preview')) + like_count = int_or_none(data.get('vote_count')) + dislike_count = int_or_none(data.get('bury_count')) + comment_count = int_or_none(data.get('comments_count')) + date = str_or_none(data.get('date')) + if date: + upload_date = date[:4] + date[5:7] + date[8:10] + + return { + '_type': 'url_transparent', + 'url': url, + 'title': title, + 'alt_title': alt_title or None, + 'uploader': uploader, + 'uploader_url': uploader_url, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'age_limit': age_limit, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'comment_count': comment_count, + }