# encoding:utf-8 # author:Prinz23 # project:imdb_api __author__ = 'Prinz23' __version__ = '1.0' __api_version__ = '1.0.0' import logging import re from .imdb_exceptions import * from exceptions_helper import ex from six import iteritems from bs4_parser import BS4Parser from lib import imdbpie from lib.tvinfo_base.exceptions import BaseTVinfoShownotfound from lib.tvinfo_base import TVInfoBase, TVINFO_TRAKT, TVINFO_TMDB, TVINFO_TVDB, TVINFO_TVRAGE, TVINFO_IMDB, \ Person, PersonGenders, TVINFO_TWITTER, TVINFO_FACEBOOK, TVINFO_WIKIPEDIA, TVINFO_INSTAGRAM, Character, TVInfoShow from sg_helpers import get_url, try_int from lib.dateutil.parser import parser # noinspection PyUnreachableCode if False: from typing import Any, AnyStr, Dict, List, Optional, Union from six import integer_types tz_p = parser() log = logging.getLogger('imdb.api') log.addHandler(logging.NullHandler()) class IMDbIndexer(TVInfoBase): # supported_id_searches = [TVINFO_IMDB] supported_person_id_searches = [TVINFO_IMDB] # noinspection PyUnusedLocal # noinspection PyDefaultArgument def __init__(self, *args, **kwargs): super(IMDbIndexer, self).__init__(*args, **kwargs) @staticmethod def _convert_person(person_obj, filmography=None, bio=None): if isinstance(person_obj, dict) and 'imdb_id' in person_obj: imdb_id = try_int(re.search(r'(\d+)', person_obj['imdb_id']).group(1)) return Person(p_id=imdb_id, name=person_obj['name'], ids={TVINFO_IMDB: imdb_id}) characters = [] for known_for in (filmography and filmography['filmography']) or []: if known_for['titleType'] not in ('tvSeries', 'tvMiniSeries'): continue for character in known_for.get('characters') or []: show = TVInfoShow() show.id = try_int(re.search(r'(\d+)', known_for.get('id')).group(1)) show.ids.imdb = show.id show.seriesname = known_for.get('title') show.firstaired = known_for.get('year') characters.append( Character(name=character, show=show, start_year=known_for.get('startYear'), end_year=known_for.get('endYear')) ) try: birthdate = person_obj['base']['birthDate'] and tz_p.parse(person_obj['base']['birthDate']).date() except (BaseException, Exception): birthdate = None try: deathdate = person_obj['base']['deathDate'] and tz_p.parse(person_obj['base']['deathDate']).date() except (BaseException, Exception): deathdate = None imdb_id = try_int(re.search(r'(\d+)', person_obj['id']).group(1)) return Person(p_id=imdb_id, name=person_obj['base'].get('name'), ids={TVINFO_IMDB: imdb_id}, gender=PersonGenders.imdb_map.get(person_obj['base'].get('gender'), PersonGenders.unknown), image=person_obj['base'].get('image', {}).get('url'), birthplace=person_obj['base'].get('birthPlace'), birthdate=birthdate, deathdate=deathdate, height=person_obj['base'].get('heightCentimeters'), characters=characters, deathplace=person_obj['base'].get('deathPlace'), nicknames=set((person_obj['base'].get('nicknames') and person_obj['base'].get('nicknames')) or []), real_name=person_obj['base'].get('realName'), akas=set((person_obj['base'].get('akas') and person_obj['base'].get('akas')) or []), bio=bio ) def _search_person(self, name=None, ids=None): # type: (AnyStr, Dict[integer_types, integer_types]) -> List[Person] """ search for person by name :param name: name to search for :param ids: dict of ids to search :return: list of found person's """ results, ids = [], ids or {} for tv_src in self.supported_person_id_searches: if tv_src in ids: if TVINFO_IMDB == tv_src: try: p = self.get_person(ids[tv_src]) except (BaseException, Exception): p = None if p: results.append(p) if name: cache_name_key = 'p-name-%s' % name is_none, ps = self._get_cache_entry(cache_name_key) if None is ps and not is_none: try: ps = imdbpie.Imdb().search_for_name(name) except (BaseException, Exception): ps = None self._set_cache_entry(cache_name_key, ps) if ps: for cp in ps: if not any(1 for c in results if cp['imdb_id'] == 'nm%07d' % c.id): results.append(self._convert_person(cp)) return results def _get_bio(self, p_id): try: bio = get_url('https://www.imdb.com/name/nm%07d/bio' % p_id, headers={'Accept-Language': 'en'}) if not bio: return with BS4Parser(bio) as bio_item: bv = bio_item.find(string='Mini Bio', recursive=True).find_next('p') for a in bv.findAll('a'): a.replaceWithChildren() for b in bv.findAll('br'): b.replaceWith('\n') return bv.get_text().strip() except (BaseException, Exception): return def get_person(self, p_id, get_show_credits=False, get_images=False, **kwargs): # type: (integer_types, bool, bool, Any) -> Optional[Person] if not p_id: return cache_main_key, cache_bio_key, cache_credits_key = 'p-main-%s' % p_id, 'p-bio-%s' % p_id, 'p-credits-%s' % p_id is_none, p = self._get_cache_entry(cache_main_key) if None is p and not is_none: try: p = imdbpie.Imdb().get_name(imdb_id='nm%07d' % p_id) except (BaseException, Exception): p = None self._set_cache_entry(cache_main_key, p) is_none, bio = self._get_cache_entry(cache_bio_key) if None is bio and not is_none: bio = self._get_bio(p_id) self._set_cache_entry(cache_bio_key, bio) fg = None if get_show_credits: is_none, fg = self._get_cache_entry(cache_credits_key) if None is fg and not is_none: try: fg = imdbpie.Imdb().get_name_filmography(imdb_id='nm%07d' % p_id) except (BaseException, Exception): fg = None self._set_cache_entry(cache_credits_key, fg) if p: return self._convert_person(p, filmography=fg, bio=bio)