[pornhub] refactor scraping protection workaround from phantomjs to playwright
This commit is contained in:
parent
88f5839a37
commit
95b061297c
|
@ -12,7 +12,7 @@ from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from .openload import PhantomJSwrapper
|
from ..playwright import PlaywrightHelper
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -27,6 +27,8 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class PornHubBaseIE(InfoExtractor):
|
class PornHubBaseIE(InfoExtractor):
|
||||||
|
_REQUIRES_PLAYWRIGHT = True
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
def dl(*args, **kwargs):
|
def dl(*args, **kwargs):
|
||||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||||
|
@ -41,8 +43,10 @@ class PornHubBaseIE(InfoExtractor):
|
||||||
url = (url_or_request.get_full_url()
|
url = (url_or_request.get_full_url()
|
||||||
if isinstance(url_or_request, compat_urllib_request.Request)
|
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
else url_or_request)
|
else url_or_request)
|
||||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
display_id = args[1]
|
||||||
phantom.get(url, html=webpage)
|
pwh = PlaywrightHelper(self)
|
||||||
|
pwh.open_page(url, display_id, html=webpage)
|
||||||
|
pwh.browser_stop()
|
||||||
webpage, urlh = dl(*args, **kwargs)
|
webpage, urlh = dl(*args, **kwargs)
|
||||||
|
|
||||||
return webpage, urlh
|
return webpage, urlh
|
||||||
|
@ -53,14 +57,13 @@ class PornHubIE(PornHubBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '648719015',
|
'id': '648719015',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -79,14 +82,14 @@ class PornHubIE(PornHubBaseIE):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# non-ASCII title
|
# non-ASCII title
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5fc81436ca0ec',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1331683002',
|
'id': 'ph5fc81436ca0ec',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '重庆婷婷女王足交',
|
'title': '淨化魔物的少女-3',
|
||||||
'upload_date': '20150213',
|
'timestamp': 1606948060,
|
||||||
'timestamp': 1423804862,
|
'upload_date': '20201202',
|
||||||
'duration': 1753,
|
'uploader': 'jojobyby999',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
|
@ -100,13 +103,14 @@ class PornHubIE(PornHubBaseIE):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# subtitles
|
# subtitles
|
||||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a0daf3c370f6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ph5af5fef7c2aa7',
|
'id': 'ph5a0daf3c370f6',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
|
'title': 'VIXEN Hot Student Fucks Teacher',
|
||||||
'uploader': 'BFFs',
|
'upload_date': '20171116',
|
||||||
'duration': 622,
|
'timestamp': 1510946038,
|
||||||
|
'uploader': 'Vixen',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
|
@ -123,7 +127,6 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'This video has been disabled',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -152,6 +155,9 @@ class PornHubIE(PornHubBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornhub.org/view_video.php?viewkey=ph5a0daf3c370f6',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -344,11 +350,11 @@ class PornHubIE(PornHubBaseIE):
|
||||||
webpage, 'uploader', default=None)
|
webpage, 'uploader', default=None)
|
||||||
|
|
||||||
view_count = self._extract_count(
|
view_count = self._extract_count(
|
||||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
r'<span class="count"(?: [^>]+)?>([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||||
like_count = self._extract_count(
|
like_count = self._extract_count(
|
||||||
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
r'<span class="votesUp"(?: [^>]+)?\s+data-rating="(\d+)"', webpage, 'like')
|
||||||
dislike_count = self._extract_count(
|
dislike_count = self._extract_count(
|
||||||
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
r'<span class="votesDown"(?: [^>]+)?\s+data-rating="(\d+)"', webpage, 'dislike')
|
||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
|
@ -422,7 +428,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||||
'playlist_mincount': 118,
|
'playlist_mincount': 118,
|
||||||
|
@ -605,7 +611,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
Loading…
Reference in a new issue