141 lines
5 KiB
Python
141 lines
5 KiB
Python
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
from .common import (
|
|
InfoExtractor,
|
|
ExtractorError,
|
|
)
|
|
from ..utils import (
|
|
clean_html,
|
|
int_or_none,
|
|
str_or_none,
|
|
url_or_none,
|
|
)
|
|
|
|
import re
|
|
|
|
|
|
class WykopIE(InfoExtractor):
|
|
IE_NAME = 'wykop'
|
|
_VALID_URL = r'https?://(?:www\.)?wykop\.pl/(?P<type>link|wpis)/(?P<id>\d+)(?:/comment/\d+|/[^#/\s]+|/#comment-(?P<comment_id>\d+))*'
|
|
_TESTS = [{
|
|
'url': 'https://www.wykop.pl/link/5789155',
|
|
'info_dict': {
|
|
'id': '7b27ly', # streamable id
|
|
'title': 'Wypadek pijanych(prawdopodobnie) idiotów widziany z wnętrza samochodu.',
|
|
'ext': 'mp4',
|
|
'uploader': 'errorek95',
|
|
'timestamp': 1604847466.56506,
|
|
'upload_date': '20201108',
|
|
},
|
|
}, {
|
|
'url': 'https://www.wykop.pl/link/5787937/#comment-84100287',
|
|
'info_dict': {
|
|
'id': '2uz8kj',
|
|
'title': 'RETROWIRUS - ',
|
|
'ext': 'mp4',
|
|
'timestamp': 1604668080.99827,
|
|
'uploader': 'RETROWIRUS',
|
|
'upload_date': '20201107',
|
|
},
|
|
}, {
|
|
'url': 'https://www.wykop.pl/wpis/53405999',
|
|
'info_dict': {
|
|
'id': 'yeujjq', # streamable
|
|
'title': 'Potat - Chłop przerabia krzyż #heheszki #niewiemjaktootagowac',
|
|
'ext': 'mp4',
|
|
'uploader': 'Potat',
|
|
'upload_date': '20201108',
|
|
'timestamp': 1604825368.86073,
|
|
}
|
|
}, {
|
|
'url': 'https://www.wykop.pl/wpis/53415243/m00d-neuropa-bekazprawakow/#comment-189438995',
|
|
'info_dict': {
|
|
'id': 'jtxd8d',
|
|
'title': 'Nox_ - ( ͡° ͜ʖ ͡°)',
|
|
'ext': 'mp4',
|
|
'upload_date': '20201108',
|
|
'uploader': 'Nox_',
|
|
'timestamp': 1603830140.88605,
|
|
}
|
|
}, {
|
|
'url': 'https://wykop.pl/wpis/53404647/pokaz-spoiler/',
|
|
'only_matching': True,
|
|
}, {
|
|
'url': 'http://www.wykop.pl/wpis/53415243/#comment-189438995',
|
|
'only_matching': True,
|
|
}, {
|
|
'url': 'https://www.wykop.pl/link/5789155/wypadek-pijanych-prawdopodobnie-idiotow-widziany-z-wnetrza-samochodu/',
|
|
'only_matching': True,
|
|
}, {
|
|
'url': 'https://www.wykop.pl/link/5785947/comment/84053103/#comment-84053103',
|
|
'only_matching': True,
|
|
}]
|
|
|
|
def _real_extract(self, source_url):
|
|
mobj = re.match(self._VALID_URL, source_url)
|
|
id, comment_id = mobj.group('id', 'comment_id')
|
|
method_1 = 'links' if mobj.group('type') == 'link' else 'entries'
|
|
method_2 = 'comment' if comment_id \
|
|
else 'link' if method_1 == 'links' \
|
|
else 'entry'
|
|
|
|
meta = self._download_json(
|
|
'https://a2.wykop.pl/%s/%s/%s/appkey/aNd401dAPp' % (method_1, method_2, comment_id if comment_id else id),
|
|
comment_id or id)
|
|
|
|
if meta.get('error'):
|
|
error = meta['error']
|
|
raise ExtractorError('Wykop.pl said: "%s" (%d)' % (error['message_en'], error['code']))
|
|
|
|
data = meta['data']
|
|
|
|
uploader = uploader_url = alt_title = upload_date = None
|
|
# author can be null, just wypiek api things - https://www.wykop.pl/wpis/36527259/
|
|
if 'author' in data:
|
|
author = data['author']
|
|
uploader = author['login']
|
|
uploader_url = 'https://www.wykop.pl/ludzie/%s' % uploader if '.' not in uploader else None
|
|
|
|
if method_1 == 'entries' or method_2 == 'comment':
|
|
# links/link, entries/entry, entries/comment
|
|
if 'embed' not in data:
|
|
raise ExtractorError('No embed found in the %s' % method_2)
|
|
embed = data['embed']
|
|
if not embed['type'] == 'video':
|
|
raise ExtractorError('No video found in the %s' % method_2)
|
|
url = embed['url']
|
|
|
|
title = '%s - %s' % (uploader, clean_html(data.get('body') or ''))
|
|
else:
|
|
# links/comment
|
|
url = data['source_url']
|
|
title = clean_html(data['title'])
|
|
alt_title = clean_html(data['description'])
|
|
|
|
embed_or_data = data.get('embed') or data
|
|
age_limit = 18 if embed_or_data.get('plus18') else 0
|
|
thumbnail = url_or_none(embed_or_data.get('preview'))
|
|
like_count = int_or_none(data.get('vote_count'))
|
|
dislike_count = int_or_none(data.get('bury_count'))
|
|
comment_count = int_or_none(data.get('comments_count'))
|
|
date = str_or_none(data.get('date'))
|
|
if date:
|
|
upload_date = date[:4] + date[5:7] + date[8:10]
|
|
|
|
return {
|
|
'_type': 'url_transparent',
|
|
'url': url,
|
|
'title': title,
|
|
'alt_title': alt_title or None,
|
|
'uploader': uploader,
|
|
'uploader_url': uploader_url,
|
|
'thumbnail': thumbnail,
|
|
'upload_date': upload_date,
|
|
'age_limit': age_limit,
|
|
'like_count': like_count,
|
|
'dislike_count': dislike_count,
|
|
'comment_count': comment_count,
|
|
}
|