From f74f92f2fbfba0fa00904b3cdac557ca025ccc6d Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 20 Oct 2020 15:28:01 +0100 Subject: [PATCH] Change improve scantree performance with regex params of what to include and/or exclude. Change rename remove_file_failed to remove_file_perm and make it return an outcome. Change doctype for scantree as `Generator` to define both yield and return types. --- CHANGES.md | 2 + lib/sg_helpers.py | 20 ++++----- sickbeard/browser.py | 6 +-- sickbeard/db.py | 28 +++++------- sickbeard/helpers.py | 98 +++++++++++++++++++++--------------------- sickbeard/logger.py | 4 +- sickbeard/processTV.py | 2 +- sickbeard/providers/generic.py | 4 +- sickbeard/search.py | 2 +- sickbeard/show_name_helpers.py | 9 ++-- sickbeard/webserve.py | 39 ++++++++--------- 11 files changed, 99 insertions(+), 115 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 697079e..e4fce47 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,8 @@ * Change improve import shows listing performance * Change improve performance during show rescan process * Change improve performance during media processing +* Change improve scantree performance with regex params of what to include and/or exclude +* Change rename remove_file_failed to remove_file_perm and make it return an outcome * Add config/General/Updates/Alias Process button, minimum interval for a fetch of custom names/numbering is 30 mins * Add Export alternatives button to edit show * Change season specific alt names now available not just for anime diff --git a/lib/sg_helpers.py b/lib/sg_helpers.py index d130e1a..8350044 100644 --- a/lib/sg_helpers.py +++ b/lib/sg_helpers.py @@ -1132,24 +1132,29 @@ def move_file(src_file, dest_file): ek.ek(os.unlink, src_file) -def remove_file_failed(filepath): +def remove_file_perm(filepath): + # type: (AnyStr) -> Optional[bool] """ Remove file :param filepath: Path and file name - :type filepath: AnyStr + :return True if filepath does not exist else None if no removal """ + if not ek.ek(os.path.exists, filepath): + return True for t in list_range(10): # total seconds to wait 0 - 9 = 45s over 10 iterations try: ek.ek(os.remove, filepath) except OSError as e: if getattr(e, 'winerror', 0) not in (5, 32): # 5=access denied (e.g. av), 32=another process has lock - break + logger.warning('Unable to delete %s: %r / %s' % (filepath, e, ex(e))) + return except (BaseException, Exception): pass time.sleep(t) if not ek.ek(os.path.exists, filepath): - break + return True + logger.warning('Unable to delete %s' % filepath) def remove_file(filepath, tree=False, prefix_failure='', log_level=logging.INFO): @@ -1365,10 +1370,5 @@ def compress_file(target, filename, prefer_7z=True, remove_source=True): logger.debug('traceback: %s' % ex(e)) return False if remove_source: - try: - remove_file_failed(target) - except (BaseException, Exception) as e: - logger.error('error removing %s' % target) - logger.debug('traceback: %s' % ex(e)) - return False + remove_file_perm(target) return True diff --git a/sickbeard/browser.py b/sickbeard/browser.py index c229129..ded787c 100644 --- a/sickbeard/browser.py +++ b/sickbeard/browser.py @@ -115,12 +115,12 @@ def get_file_list(path, include_files): hide_names = [ # windows specific - 'boot', 'bootmgr', 'cache', 'config.msi', 'msocache', 'recovery', '$recycle.bin', 'recycler', + 'boot', 'bootmgr', 'cache', r'config\.msi', 'msocache', 'recovery', r'\$recycle\.bin', 'recycler', 'system volume information', 'temporary internet files', # osx specific - '.fseventd', '.spotlight', '.trashes', '.vol', 'cachedmessages', 'caches', 'trash', + r'\.fseventd', r'\.spotlight', r'\.trashes', r'\.vol', 'cachedmessages', 'caches', 'trash', # general - '.git'] + r'\.git'] # filter directories to protect for direntry in scantree(path, exclude=hide_names, filter_kind=not include_files, recurse=False) or []: diff --git a/sickbeard/db.py b/sickbeard/db.py index a841d28..dfd4b35 100644 --- a/sickbeard/db.py +++ b/sickbeard/db.py @@ -32,10 +32,12 @@ from exceptions_helper import ex import sickbeard from . import logger, sgdatetime +from .helpers import scantree from .sgdatetime import timestamp_near -from _23 import filter_iter, filter_list, list_values, scandir -from sg_helpers import make_dirs, compress_file, remove_file_failed +from sg_helpers import make_dirs, compress_file, remove_file_perm + +from _23 import filter_iter, list_values, scandir from six import iterkeys, iteritems, itervalues # noinspection PyUnreachableCode @@ -780,22 +782,12 @@ def delete_old_db_backups(target): :param target: backup folder to check """ - try: - if not ek.ek(os.path.isdir, target): - return - file_list = [f for f in ek.ek(scandir, target) if f.is_file()] - use_count = (1, sickbeard.BACKUP_DB_MAX_COUNT)[not sickbeard.BACKUP_DB_ONEDAY] - for filename in ['sickbeard', 'cache', 'failed']: - tb = filter_list(lambda fn: filename in fn.name, file_list) - if use_count < len(tb): - tb.sort(key=lambda f: f.stat(follow_symlinks=False).st_mtime, reverse=True) - for t in tb[use_count:]: - try: - remove_file_failed(t.path) - except (BaseException, Exception): - pass - except (BaseException, Exception): - pass + use_count = (1, sickbeard.BACKUP_DB_MAX_COUNT)[not sickbeard.BACKUP_DB_ONEDAY] + file_list = [f for f in scantree(target, include=['sickbeard|cache|failed'], filter_kind=False)] + if use_count < len(file_list): + file_list.sort(key=lambda _f: _f.stat(follow_symlinks=False).st_mtime, reverse=True) + for direntry in file_list[use_count:]: + remove_file_perm(direntry.path) def backup_all_dbs(target, compress=True, prefer_7z=True): diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index f939a99..a315f42 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -63,12 +63,12 @@ from six.moves import zip # noinspection PyUnresolvedReferences from sg_helpers import chmod_as_parent, clean_data, copy_file, fix_set_group_id, get_system_temp_dir, \ get_url, indent_xml, make_dirs, maybe_plural, md5_for_text, move_file, proxy_setting, remove_file, \ - remove_file_failed, replace_extension, try_int, try_ord, write_file + remove_file_perm, replace_extension, try_int, try_ord, write_file # noinspection PyUnreachableCode if False: # noinspection PyUnresolvedReferences - from typing import Any, AnyStr, Dict, NoReturn, Iterable, Iterator, List, Optional, Set, Tuple, Union + from typing import Any, AnyStr, Dict, Generator, NoReturn, Iterable, Iterator, List, Optional, Set, Tuple, Union from .tv import TVShow # the following workaround hack resolves a pyc resolution bug from .name_cache import retrieveNameFromCache @@ -361,10 +361,10 @@ def list_media_files(path): """ result = [] if path: - if '.sickgearignore' in [direntry.name for direntry in scantree(path, filter_kind=False, recurse=False)]: + if [direntry for direntry in scantree(path, include=[r'\.sickgearignore'], filter_kind=False, recurse=False)]: logger.log('Skipping folder "%s" because it contains ".sickgearignore"' % path, logger.DEBUG) else: - result = [direntry.path for direntry in scantree(path, filter_kind=False, exclude='Extras') + result = [direntry.path for direntry in scantree(path, exclude=['Extras'], filter_kind=False) if has_media_ext(direntry.name)] return result @@ -1086,7 +1086,7 @@ def download_file(url, filename, session=None, **kwargs): :rtype: bool """ if None is get_url(url, session=session, savename=filename, **kwargs): - remove_file_failed(filename) + remove_file_perm(filename) return False return True @@ -1099,26 +1099,21 @@ def clear_cache(force=False): :type force: bool """ # clean out cache directory, remove everything > 12 hours old - if sickbeard.CACHE_DIR: - logger.log(u'Trying to clean cache folder %s' % sickbeard.CACHE_DIR) + dirty = None + del_time = int(timestamp_near((datetime.datetime.now() - datetime.timedelta(hours=12)))) + direntry_args = dict(follow_symlinks=False) + for direntry in scantree(sickbeard.CACHE_DIR, ['images|rss|zoneinfo'], follow_symlinks=True): + if direntry.is_file(**direntry_args) and (force or del_time > direntry.stat(**direntry_args).st_mtime): + dirty = dirty or False if remove_file_perm(direntry.path) else True + elif direntry.is_dir(**direntry_args) and direntry.name not in ['cheetah', 'sessions', 'indexers']: + dirty = dirty or False + try: + ek.ek(os.rmdir, direntry.path) + except OSError: + dirty = True - # Does our cache_dir exists - if not ek.ek(os.path.isdir, sickbeard.CACHE_DIR): - logger.log(u'Skipping clean of non-existing folder: %s' % sickbeard.CACHE_DIR, logger.WARNING) - else: - exclude = ['rss', 'images', 'zoneinfo'] - del_time = int(timestamp_near((datetime.datetime.now() - datetime.timedelta(hours=12)))) - for f in scantree(sickbeard.CACHE_DIR, exclude, follow_symlinks=True): - if f.is_file(follow_symlinks=False) and (force or del_time > f.stat(follow_symlinks=False).st_mtime): - try: - ek.ek(os.remove, f.path) - except OSError as e: - logger.log('Unable to delete %s: %r / %s' % (f.path, e, ex(e)), logger.WARNING) - elif f.is_dir(follow_symlinks=False) and f.name not in ['cheetah', 'sessions', 'indexers']: - try: - ek.ek(os.rmdir, f.path) - except OSError: - pass + logger.log(u'%s from cache folder %s' % ((('Found items removed', 'Found items not removed')[dirty], + 'No items found to remove')[None is dirty], sickbeard.CACHE_DIR)) def human(size): @@ -1373,31 +1368,36 @@ def cpu_sleep(): def scantree(path, # type: AnyStr exclude=None, # type: Optional[AnyStr, List[AnyStr]] + include=None, # type: Optional[AnyStr, List[AnyStr]] follow_symlinks=False, # type: bool filter_kind=None, # type: Optional[bool] recurse=True # type: bool ): - # type: (...) -> Optional[Iterator[DirEntry], Iterable] - """yield DirEntry objects for given path. - :param path: Path to scan - :param exclude: Exclusions + # type: (...) -> Generator[DirEntry, None, None] + """Yield DirEntry objects for given path. Returns without yield if path fails sanity check + + :param path: Path to scan, sanity check is_dir and exists + :param exclude: Escaped regex string(s) to exclude + :param include: Escaped regex string(s) to include :param follow_symlinks: Follow symlinks - :param filter_kind: None to yield everything, True only yields directories, False only yields files - :param recurse: Recursively scan down the tree - :return: iter of results - """ - exclude = [x.lower() for x in (exclude, ([exclude], [])[None is exclude])[not isinstance(exclude, list)]] - for entry in ek.ek(scandir, path): - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - is_file = entry.is_file(follow_symlinks=follow_symlinks) - if entry.name.lower() not in exclude \ - and any([None is filter_kind, filter_kind and is_dir, - not filter_kind and is_dir and recurse, not filter_kind and is_file]): - if recurse and is_dir: - for subentry in scantree(entry.path, exclude, follow_symlinks, filter_kind): - yield subentry - if any([None is filter_kind, filter_kind and is_dir, not filter_kind and is_file]): - yield entry + :param filter_kind: None to yield everything, True yields directories, False yields files + :param recurse: Recursively scan the tree + """ + if isinstance(path, string_types) and path and ek.ek(os.path.isdir, path): + rc_exc, rc_inc = [re.compile(rx % '|'.join( + [x for x in (param, ([param], [])[None is param])[not isinstance(param, list)]])) + for rx, param in ((r'(?i)^(?:(?!%s).)*$', exclude), (r'(?i)%s', include))] + for entry in ek.ek(scandir, path): + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) + is_file = entry.is_file(follow_symlinks=follow_symlinks) + no_filter = any([None is filter_kind, filter_kind and is_dir, not filter_kind and is_file]) + if (rc_exc.search(entry.name), True)[not exclude] and (rc_inc.search(entry.name), True)[not include] \ + and (no_filter or (not filter_kind and is_dir and recurse)): + if recurse and is_dir: + for subentry in scantree(entry.path, exclude, include, follow_symlinks, filter_kind, recurse): + yield subentry + if no_filter: + yield entry def cleanup_cache(): @@ -1422,13 +1422,11 @@ def delete_not_changed_in(paths, days=30, minutes=0): del_time = int(timestamp_near((datetime.datetime.now() - datetime.timedelta(days=days, minutes=minutes)))) errors = 0 qualified = 0 - for c in (paths, [paths])[not isinstance(paths, list)]: + for cur_path in (paths, [paths])[not isinstance(paths, list)]: try: - for f in scantree(c): - if f.is_file(follow_symlinks=False) and del_time > f.stat(follow_symlinks=False).st_mtime: - try: - ek.ek(os.remove, f.path) - except (BaseException, Exception): + for direntry in scantree(cur_path, filter_kind=False): + if del_time > direntry.stat(follow_symlinks=False).st_mtime: + if not remove_file_perm(direntry.path): errors += 1 qualified += 1 except (BaseException, Exception): diff --git a/sickbeard/logger.py b/sickbeard/logger.py index bd37945..6c1b404 100644 --- a/sickbeard/logger.py +++ b/sickbeard/logger.py @@ -32,7 +32,7 @@ from logging.handlers import TimedRotatingFileHandler import sickbeard from . import classes from .sgdatetime import timestamp_near -from sg_helpers import md5_for_text, remove_file_failed +from sg_helpers import md5_for_text, remove_file_perm # noinspection PyUnreachableCode if False: @@ -383,7 +383,7 @@ class TimedCompressedRotatingFileHandler(TimedRotatingFileHandler): except OSError: pass else: - remove_file_failed(filepath) + remove_file_perm(filepath) sb_log_instance = SBRotatingLogHandler('sickgear.log') diff --git a/sickbeard/processTV.py b/sickbeard/processTV.py index 65eacfa..05a3a4a 100644 --- a/sickbeard/processTV.py +++ b/sickbeard/processTV.py @@ -137,7 +137,7 @@ class ProcessTVShow(object): if sickbeard.TV_DOWNLOAD_DIR and helpers.real_path(sickbeard.TV_DOWNLOAD_DIR) == helpers.real_path(folder): return False - # check if it's empty folder when wanted checked + # test if folder empty when check wanted if check_empty and len([direntry.path for direntry in scantree(folder, recurse=False)]): return False diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index 55c0a92..4600bb7 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -39,7 +39,7 @@ import sickbeard from .. import classes, db, helpers, logger, tvcache from ..classes import NZBSearchResult, TorrentSearchResult, SearchResult from ..common import Quality, MULTI_EP_RESULT, SEASON_RESULT, USER_AGENT -from ..helpers import maybe_plural, remove_file_failed +from ..helpers import maybe_plural, remove_file_perm from ..name_parser.parser import InvalidNameException, InvalidShowException, NameParser from ..scene_exceptions import has_season_exceptions from ..show_name_helpers import get_show_names_all_possible @@ -769,7 +769,7 @@ class GenericProvider(object): saved = True break - remove_file_failed(cache_file) + remove_file_perm(cache_file) if 'Referer' in self.session.headers: if ref_state: diff --git a/sickbeard/search.py b/sickbeard/search.py index 0c89991..ffad4d1 100644 --- a/sickbeard/search.py +++ b/sickbeard/search.py @@ -163,7 +163,7 @@ def snatch_episode(result, end_status=SNATCHED): dl_result = clients.get_client_instance(sickbeard.TORRENT_METHOD)().send_torrent(result) if result.cache_filepath: - helpers.remove_file_failed(result.cache_filepath) + helpers.remove_file_perm(result.cache_filepath) else: logger.log(u'Unknown result type, unable to download it', logger.ERROR) dl_result = False diff --git a/sickbeard/show_name_helpers.py b/sickbeard/show_name_helpers.py index 9ddb822..e522625 100644 --- a/sickbeard/show_name_helpers.py +++ b/sickbeard/show_name_helpers.py @@ -443,14 +443,11 @@ def determineReleaseName(dir_name=None, nzb_name=None): file_types = ['*.nzb', '*.nfo'] for search in file_types: - - reg_expr = re.compile(fnmatch.translate(search), re.IGNORECASE) - results = [direntry.name for direntry in scantree(dir_name, filter_kind=False, recurse=False) - if reg_expr.search(direntry.name)] + results = [direntry.name for direntry in scantree(dir_name, include=[fnmatch.translate(search)], + filter_kind=False, recurse=False)] if 1 == len(results): - found_file = ek.ek(os.path.basename, results[0]) - found_file = found_file.rpartition('.')[0] + found_file = results[0].rpartition('.')[0] if pass_wordlist_checks(found_file): logger.log(u'Release name (%s) found from file (%s)' % (found_file, results[0])) return found_file.rpartition('.')[0] diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index 880242a..02fabe3 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -52,7 +52,7 @@ from .anime import AniGroupList, pull_anidb_groups, short_group_names from .browser import folders_at_path from .common import ARCHIVED, DOWNLOADED, FAILED, IGNORED, SKIPPED, SNATCHED, SNATCHED_ANY, UNAIRED, UNKNOWN, WANTED, \ SD, HD720p, HD1080p, UHD2160p, Overview, Quality, qualityPresetStrings, statusStrings -from .helpers import has_image_ext, remove_article, scantree, starify +from .helpers import has_image_ext, remove_article, remove_file_perm, scantree, starify from .indexermapper import MapStatus, map_indexers_to_show, save_mapping from .indexers.indexer_config import TVINFO_IMDB, TVINFO_TRAKT, TVINFO_TVDB from .name_parser.parser import InvalidNameException, InvalidShowException, NameParser @@ -499,12 +499,8 @@ class RepoHandler(BaseStaticFileHandler): def save_zip(name, version, zip_path, zip_method): zip_name = '%s-%s.zip' % (name, version) zip_file = ek.ek(os.path.join, zip_path, zip_name) - for f in helpers.scantree(zip_path, ['resources']): - if f.is_file(follow_symlinks=False) and f.name[-4:] in ('.zip', '.md5'): - try: - ek.ek(os.remove, f.path) - except OSError as e: - logger.log('Unable to delete %s: %r / %s' % (f.path, e, ex(e)), logger.WARNING) + for direntry in helpers.scantree(zip_path, ['resources'], [r'\.(?:md5|zip)$'], filter_kind=False): + remove_file_perm(direntry.path) zip_data = zip_method() with io.open(zip_file, 'wb') as zh: zh.write(zip_data) @@ -679,22 +675,21 @@ class RepoHandler(BaseStaticFileHandler): if sickbeard.ENV.get('DEVENV') and ek.ek(os.path.exists, devenv_src): helpers.copy_file(devenv_src, devenv_dst) else: - helpers.remove_file_failed(devenv_dst) + helpers.remove_file_perm(devenv_dst) - for f in helpers.scantree(zip_path): - if f.is_file(follow_symlinks=False) and f.name[-4:] not in '.xcf': - try: - infile = None - if 'service.sickgear.watchedstate.updater' in f.path and f.path.endswith('addon.xml'): - infile = self.get_watchedstate_updater_addon_xml() - if not infile: - with io.open(f.path, 'rb') as fh: - infile = fh.read() - - with zipfile.ZipFile(bfr, 'a') as zh: - zh.writestr(ek.ek(os.path.relpath, f.path, basepath), infile, zipfile.ZIP_DEFLATED) - except OSError as e: - logger.log('Unable to zip %s: %r / %s' % (f.path, e, ex(e)), logger.WARNING) + for direntry in helpers.scantree(zip_path, exclude=[r'\.xcf$'], filter_kind=False): + try: + infile = None + if 'service.sickgear.watchedstate.updater' in direntry.path and direntry.path.endswith('addon.xml'): + infile = self.get_watchedstate_updater_addon_xml() + if not infile: + with io.open(direntry.path, 'rb') as fh: + infile = fh.read() + + with zipfile.ZipFile(bfr, 'a') as zh: + zh.writestr(ek.ek(os.path.relpath, direntry.path, basepath), infile, zipfile.ZIP_DEFLATED) + except OSError as e: + logger.log('Unable to zip %s: %r / %s' % (direntry.path, e, ex(e)), logger.WARNING) zip_data = bfr.getvalue() bfr.close()