_json_ld: podcasts objects
This commit is contained in:
parent
3a79666639
commit
abfbb7d014
|
@ -1264,8 +1264,10 @@ class InfoExtractor(object):
|
||||||
continue
|
continue
|
||||||
info[count_key] = interaction_count
|
info[count_key] = interaction_count
|
||||||
|
|
||||||
def extract_video_object(e):
|
media_object_types = ('MediaObject', 'VideoObject', 'AudioObject', 'MusicVideoObject')
|
||||||
assert e['@type'] == 'VideoObject'
|
|
||||||
|
def extract_media_object(e):
|
||||||
|
assert e['@type'] in media_object_types
|
||||||
thumbnails = e.get('thumbnailUrl') or e.get('thumbnailURL')
|
thumbnails = e.get('thumbnailUrl') or e.get('thumbnailURL')
|
||||||
if isinstance(thumbnails, compat_str):
|
if isinstance(thumbnails, compat_str):
|
||||||
thumbnails = [thumbnails]
|
thumbnails = [thumbnails]
|
||||||
|
@ -1293,8 +1295,8 @@ class InfoExtractor(object):
|
||||||
item_type = e.get('@type')
|
item_type = e.get('@type')
|
||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
continue
|
continue
|
||||||
if item_type in ('TVEpisode', 'Episode'):
|
if item_type in ('TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||||
episode_name = unescapeHTML(e.get('name'))
|
episode_name = unescapeHTML(e.get('name') or e.get('headline'))
|
||||||
info.update({
|
info.update({
|
||||||
'episode': episode_name,
|
'episode': episode_name,
|
||||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
|
@ -1309,7 +1311,7 @@ class InfoExtractor(object):
|
||||||
'season_number': int_or_none(part_of_season.get('seasonNumber')),
|
'season_number': int_or_none(part_of_season.get('seasonNumber')),
|
||||||
})
|
})
|
||||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries', 'PodcastSeries'):
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
elif item_type == 'Movie':
|
elif item_type == 'Movie':
|
||||||
info.update({
|
info.update({
|
||||||
|
@ -1324,15 +1326,16 @@ class InfoExtractor(object):
|
||||||
'title': unescapeHTML(e.get('headline')),
|
'title': unescapeHTML(e.get('headline')),
|
||||||
'description': unescapeHTML(e.get('articleBody')),
|
'description': unescapeHTML(e.get('articleBody')),
|
||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type in media_object_types:
|
||||||
extract_video_object(e)
|
extract_media_object(e)
|
||||||
if expected_type is None:
|
if expected_type is None:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
video = e.get('video')
|
for media_key in ('video', 'associatedMedia'):
|
||||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
media = e.get(media_key)
|
||||||
extract_video_object(video)
|
if isinstance(media, dict) and media.get('@type') in media_object_types:
|
||||||
|
extract_media_object(media)
|
||||||
if expected_type is None:
|
if expected_type is None:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue