[wykop] add new extractor
This commit is contained in:
parent
5c6bcbf172
commit
0958b54441
|
@ -1423,6 +1423,7 @@ from .wsj import (
|
|||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .wykop import WykopIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
|
|
|
@ -1593,6 +1593,9 @@ class GenericIE(InfoExtractor):
|
|||
'uploader': 'Lake8737',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
'params': {
|
||||
'force_generic_extractor': True,
|
||||
},
|
||||
},
|
||||
# Another LiveLeak embed pattern (#13336)
|
||||
{
|
||||
|
|
141
haruhi_dl/extractor/wykop.py
Normal file
141
haruhi_dl/extractor/wykop.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class WykopIE(InfoExtractor):
|
||||
IE_NAME = 'wykop'
|
||||
_VALID_URL = r'https?://(?:www\.)?wykop\.pl/(?P<type>link|wpis)/(?P<id>\d+)(?:/comment/\d+|/[^#/\s]+|/#comment-(?P<comment_id>\d+))*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wykop.pl/link/5789155',
|
||||
'info_dict': {
|
||||
'id': '7b27ly', # streamable id
|
||||
'title': 'Wypadek pijanych(prawdopodobnie) idiotów widziany z wnętrza samochodu.',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'errorek95',
|
||||
'timestamp': 1604847466.56506,
|
||||
'upload_date': '20201108',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wykop.pl/link/5787937/#comment-84100287',
|
||||
'info_dict': {
|
||||
'id': '2uz8kj',
|
||||
'title': 'RETROWIRUS - ',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1604668080.99827,
|
||||
'uploader': 'RETROWIRUS',
|
||||
'upload_date': '20201107',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wykop.pl/wpis/53405999',
|
||||
'info_dict': {
|
||||
'id': 'yeujjq', # streamable
|
||||
'title': 'Potat - Chłop przerabia krzyż #heheszki #niewiemjaktootagowac',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Potat',
|
||||
'upload_date': '20201108',
|
||||
'timestamp': 1604825368.86073,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.wykop.pl/wpis/53415243/m00d-neuropa-bekazprawakow/#comment-189438995',
|
||||
'info_dict': {
|
||||
'id': 'jtxd8d',
|
||||
'title': 'Nox_ - ( ͡° ͜ʖ ͡°)',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20201108',
|
||||
'uploader': 'Nox_',
|
||||
'timestamp': 1603830140.88605,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wykop.pl/wpis/53404647/pokaz-spoiler/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wykop.pl/wpis/53415243/#comment-189438995',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.wykop.pl/link/5789155/wypadek-pijanych-prawdopodobnie-idiotow-widziany-z-wnetrza-samochodu/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.wykop.pl/link/5785947/comment/84053103/#comment-84053103',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, source_url):
|
||||
mobj = re.match(self._VALID_URL, source_url)
|
||||
id, comment_id = mobj.group('id', 'comment_id')
|
||||
method_1 = 'links' if mobj.group('type') == 'link' else 'entries'
|
||||
method_2 = 'comment' if comment_id \
|
||||
else 'link' if method_1 == 'links' \
|
||||
else 'entry'
|
||||
|
||||
meta = self._download_json(
|
||||
'https://a2.wykop.pl/%s/%s/%s/appkey/aNd401dAPp' % (method_1, method_2, comment_id if comment_id else id),
|
||||
comment_id or id)
|
||||
|
||||
if meta.get('error'):
|
||||
error = meta['error']
|
||||
raise ExtractorError('Wykop.pl said: "%s" (%d)' % (error['message_en'], error['code']))
|
||||
|
||||
data = meta['data']
|
||||
self.to_screen(data)
|
||||
|
||||
uploader = uploader_url = alt_title = upload_date = None
|
||||
# author can be null, just wypiek api things - https://www.wykop.pl/wpis/36527259/
|
||||
if 'author' in data:
|
||||
author = data['author']
|
||||
uploader = author['login']
|
||||
uploader_url = 'https://www.wykop.pl/ludzie/%s' % uploader if '.' not in uploader else None
|
||||
|
||||
if method_1 == 'entries' or method_2 == 'comment':
|
||||
# links/link, entries/entry, entries/comment
|
||||
if 'embed' not in data:
|
||||
raise ExtractorError('No embed found in the %s' % method_2)
|
||||
embed = data['embed']
|
||||
if not embed['type'] == 'video':
|
||||
raise ExtractorError('No video found in the %s' % method_2)
|
||||
url = embed['url']
|
||||
|
||||
title = '%s - %s' % (uploader, clean_html(data.get('body') or ''))
|
||||
else:
|
||||
# links/comment
|
||||
url = data['source_url']
|
||||
title = clean_html(data['title'])
|
||||
alt_title = clean_html(data['description'])
|
||||
|
||||
embed_or_data = data.get('embed') or data
|
||||
age_limit = 18 if embed_or_data.get('plus18') else 0
|
||||
thumbnail = url_or_none(embed_or_data.get('preview'))
|
||||
like_count = int_or_none(data.get('vote_count'))
|
||||
dislike_count = int_or_none(data.get('bury_count'))
|
||||
comment_count = int_or_none(data.get('comments_count'))
|
||||
date = str_or_none(data.get('date'))
|
||||
if date:
|
||||
upload_date = date[:4] + date[5:7] + date[8:10]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'alt_title': alt_title or None,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': age_limit,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
Loading…
Reference in a new issue