From f7e1a2a5eba318ba49abb556f86a1a0f63ee0142 Mon Sep 17 00:00:00 2001 From: Ruud Date: Sun, 9 Feb 2014 15:57:08 +0100 Subject: [PATCH] nosql --- couchpotato/core/helpers/request.py | 1 - couchpotato/core/media/movie/_base/main.py | 2 +- couchpotato/core/plugins/file/main.py | 32 +- couchpotato/core/plugins/manage/main.py | 10 +- couchpotato/core/plugins/profile/main.py | 16 +- couchpotato/core/plugins/quality/main.py | 11 +- couchpotato/core/plugins/release/index.py | 5 +- couchpotato/core/plugins/release/main.py | 71 ++--- couchpotato/core/plugins/scanner/main.py | 16 +- libs/scandir/.gitattributes | 1 + libs/scandir/.gitignore | 4 + libs/scandir/__init__.py | 0 libs/scandir/_scandir.c | 373 +++++++++++++++++++++++ libs/scandir/scandir.py | 456 +++++++++++++++++++++++++++++ 14 files changed, 915 insertions(+), 83 deletions(-) create mode 100644 libs/scandir/.gitattributes create mode 100644 libs/scandir/.gitignore create mode 100644 libs/scandir/__init__.py create mode 100644 libs/scandir/_scandir.c create mode 100644 libs/scandir/scandir.py diff --git a/couchpotato/core/helpers/request.py b/couchpotato/core/helpers/request.py index 523e46e..b59673f 100644 --- a/couchpotato/core/helpers/request.py +++ b/couchpotato/core/helpers/request.py @@ -1,5 +1,4 @@ from couchpotato.core.helpers.encoding import toUnicode -from couchpotato.core.helpers.variable import natcmp from urllib import unquote import re diff --git a/couchpotato/core/media/movie/_base/main.py b/couchpotato/core/media/movie/_base/main.py index 79ab97e..54efd67 100644 --- a/couchpotato/core/media/movie/_base/main.py +++ b/couchpotato/core/media/movie/_base/main.py @@ -45,7 +45,7 @@ class MovieBase(MovieTypeBase): addEvent('movie.update_info', self.updateInfo) addEvent('movie.update_release_dates', self.updateReleaseDate) - def add(self, params = None, force_readd = True, search_after = True, update_library = False, status = None): + def add(self, params = None, force_readd = True, search_after = True, status = None): if not params: params = {} if not params.get('identifier'): diff --git a/couchpotato/core/plugins/file/main.py b/couchpotato/core/plugins/file/main.py index 37aa728..9c862fc 100644 --- a/couchpotato/core/plugins/file/main.py +++ b/couchpotato/core/plugins/file/main.py @@ -1,12 +1,12 @@ -from couchpotato import get_session +from couchpotato import get_db from couchpotato.api import addApiView from couchpotato.core.event import addEvent from couchpotato.core.helpers.encoding import toUnicode from couchpotato.core.helpers.variable import md5, getExt from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin -from couchpotato.core.settings.model import File from couchpotato.environment import Env +from scandir import scandir from tornado.web import StaticFileHandler import os.path import time @@ -28,22 +28,30 @@ class FileManager(Plugin): 'return': {'type': 'file'} }) + addEvent('app.load', self.cleanup) + def cleanup(self): - # TODO: unused # Wait a bit after starting before cleanup - time.sleep(3) + time.sleep(2) log.debug('Cleaning up unused files') try: - db = get_session() - for root, dirs, walk_files in os.walk(Env.get('cache_dir')): - for filename in walk_files: - if os.path.splitext(filename)[1] in ['.png', '.jpg', '.jpeg']: - file_path = os.path.join(root, filename) - f = db.query(File).filter(File.path == toUnicode(file_path)).first() - if not f: - os.remove(file_path) + db = get_db() + cache_dir = Env.get('cache_dir') + medias = db.all('media', with_doc = True) + + files = [] + for media in medias: + file_dict = media['doc'].get('files', {}) + for x in file_dict.keys(): + files.extend(file_dict[x]) + + for file in scandir.scandir(cache_dir): + if os.path.splitext(file.name)[1] in ['.png', '.jpg', '.jpeg']: + file_path = os.path.join(cache_dir, file.name) + if toUnicode(file_path) not in files: + os.remove(file_path) except: log.error('Failed removing unused file: %s', traceback.format_exc()) diff --git a/couchpotato/core/plugins/manage/main.py b/couchpotato/core/plugins/manage/main.py index 1c734b1..2606500 100644 --- a/couchpotato/core/plugins/manage/main.py +++ b/couchpotato/core/plugins/manage/main.py @@ -48,6 +48,7 @@ class Manage(Plugin): if not Env.get('dev') and self.conf('startup_scan'): addEvent('app.load', self.updateLibraryQuick) + addEvent('app.load', self.updateLibrary) def getProgress(self, **kwargs): return { @@ -67,7 +68,7 @@ class Manage(Plugin): return self.updateLibrary(full = False) def updateLibrary(self, full = True): - last_update = float(Env.prop('manage.last_update', default = 0)) + last_update = 0 #float(Env.prop('manage.last_update', default = 0)) if self.in_progress: log.info('Already updating library: %s', self.in_progress) @@ -184,13 +185,12 @@ class Manage(Plugin): 'to_go': total_found, }) - if group['media'] and group['media'].get('identifier'): - identifier = group['media'].get('identifier') - added_identifiers.append(identifier) + if group['media'] and group['identifier']: + added_identifiers.append(group['identifier']) # Add it to release and update the info fireEvent('release.add', group = group) - fireEvent('movie.update_info', identifier = identifier, on_complete = self.createAfterUpdate(folder, identifier)) + fireEvent('movie.update_info', identifier = group['identifier'], on_complete = self.createAfterUpdate(folder, group['identifier'])) else: self.updateProgress(folder) diff --git a/couchpotato/core/plugins/profile/main.py b/couchpotato/core/plugins/profile/main.py index 1c3ec63..53a20d5 100644 --- a/couchpotato/core/plugins/profile/main.py +++ b/couchpotato/core/plugins/profile/main.py @@ -1,13 +1,11 @@ import traceback -from couchpotato import get_session, get_db, tryInt +from couchpotato import get_db, tryInt from couchpotato.api import addApiView from couchpotato.core.event import addEvent from couchpotato.core.helpers.encoding import toUnicode -from couchpotato.core.helpers.variable import splitString from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from .index import ProfileIndex -from couchpotato.core.settings.model import Profile, ProfileType log = CPLog(__name__) @@ -82,6 +80,7 @@ class ProfilePlugin(Plugin): db = get_db() profile = { + '_t': 'profile', 'label': toUnicode(kwargs.get('label')), 'order': tryInt(kwargs.get('order', 999)), 'core': kwargs.get('core', False), @@ -101,11 +100,11 @@ class ProfilePlugin(Plugin): id = kwargs.get('id') try: p = db.get('id', id) + profile['order'] = tryInt(kwargs.get('order', p.get('order', 999))) except: p = db.insert(profile) - p.update(profile) - p['order'] = tryInt(kwargs.get('order', p.get('order', 999))) + p.update(profile) db.update(p) return { @@ -151,15 +150,14 @@ class ProfilePlugin(Plugin): def delete(self, id = None, **kwargs): try: - db = get_session() + db = get_db() success = False message = '' - try: - p = db.query(Profile).filter_by(id = id).first() + try: + p = db.get('id', id) db.delete(p) - db.commit() # Force defaults on all empty profile movies self.forceDefaults() diff --git a/couchpotato/core/plugins/quality/main.py b/couchpotato/core/plugins/quality/main.py index 265cdf2..9bd76eb 100644 --- a/couchpotato/core/plugins/quality/main.py +++ b/couchpotato/core/plugins/quality/main.py @@ -1,5 +1,5 @@ import traceback -from couchpotato import get_session, get_db +from couchpotato import get_db from couchpotato.api import addApiView from couchpotato.core.event import addEvent from couchpotato.core.helpers.encoding import toUnicode, ss @@ -7,7 +7,6 @@ from couchpotato.core.helpers.variable import mergeDicts, getExt from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.plugins.quality.index import QualityIndex -from couchpotato.core.settings.model import Quality import re log = CPLog(__name__) @@ -124,12 +123,12 @@ class QualityPlugin(Plugin): def saveSize(self, **kwargs): try: - db = get_session() - quality = db.query(Quality).filter_by(identifier = kwargs.get('identifier')).first() + db = get_db() + quality = db.get('quality', kwargs.get('identifier'), with_doc = True) if quality: - setattr(quality, kwargs.get('value_type'), kwargs.get('value')) - db.commit() + quality['doc'][kwargs.get('value_type')] = kwargs.get('value') + db.update(quality['doc']) self.cached_qualities = None diff --git a/couchpotato/core/plugins/release/index.py b/couchpotato/core/plugins/release/index.py index 11c1256..2629fc6 100644 --- a/couchpotato/core/plugins/release/index.py +++ b/couchpotato/core/plugins/release/index.py @@ -9,11 +9,11 @@ class ReleaseIndex(TreeBasedIndex): super(ReleaseIndex, self).__init__(*args, **kwargs) def make_key(self, key): - return md5(key).hexdigest() + return key def make_key_value(self, data): if data.get('_t') == 'release' and data.get('media_id'): - return md5(data['media_id']).hexdigest(), {'media_id': data.get('media_id')} + return data['media_id'], None def run_for_media(self, db, media_id): for release in db.get_many('release', media_id, with_doc = True): @@ -49,6 +49,7 @@ class ReleaseIDIndex(TreeBasedIndex): super(ReleaseIDIndex, self).__init__(*args, **kwargs) def make_key(self, key): + print key return md5(key).hexdigest() def make_key_value(self, data): diff --git a/couchpotato/core/plugins/release/main.py b/couchpotato/core/plugins/release/main.py index bd805fd..3623a1a 100644 --- a/couchpotato/core/plugins/release/main.py +++ b/couchpotato/core/plugins/release/main.py @@ -12,7 +12,6 @@ from couchpotato.core.settings.model import Release as Relea, Media, \ from couchpotato.environment import Env from inspect import ismethod, isfunction from sqlalchemy.exc import InterfaceError -from sqlalchemy.orm import joinedload_all from sqlalchemy.sql.expression import and_, or_ import os import time @@ -110,44 +109,43 @@ class Release(Plugin): def add(self, group): try: - db = get_session() + db = get_db() identifier = '%s.%s.%s' % (group['identifier'], group['meta_data'].get('audio', 'unknown'), group['meta_data']['quality']['identifier']) - # Add movie - media = db.query(Media).filter_by(library_id = group['library'].get('id')).first() - if not media: - media = Media( - library_id = group['library'].get('id'), - profile_id = 0, - status = 'done' - ) - db.add(media) - db.commit() + # Add movie if it doesn't exist + try: + media = db.get('media', group['identifier'], with_doc = True)['doc'] + media['status'] = 'done' + db.update(media) + except: + media = { + '_t': 'media', + 'identifier': group['identifier'], + 'profile_id': None, + 'status': 'done' + } + m = db.insert(media) + media.update(m) # Add Release - rel = db.query(Relea).filter( - or_( - Relea.identifier == identifier, - and_(Relea.identifier.startswith(group['identifier']), Relea.status == 'snatched') - ) - ).first() - if not rel: - rel = Relea( - identifier = identifier, - movie = media, - quality_id = group['meta_data']['quality'].get('id'), - status = 'done' - ) - db.add(rel) - db.commit() + release = { + '_t': 'release', + 'media_id': media['_id'], + 'identifier': identifier, + 'quality': group['meta_data']['quality'].get('identifier'), + 'status': 'done' + } + try: + r = db.get('release_identifier', identifier, with_doc = True)['doc'] + release.update(r) + db.update(release) + except: + r = db.insert(release) + release.update(r) # Add each file type - rel['files'] = [] - for type in group['files']: - for cur_file in group['files'][type]: - added_file = self.saveFile(cur_file, type = type) - rel['files'].append(added_file.get('id')) + release['files'] = dict((k, v) for k, v in group['files'].items() if v) fireEvent('media.restatus', media['_id']) @@ -157,15 +155,6 @@ class Release(Plugin): return False - def saveFile(self, filepath, type = 'unknown', include_media_info = False): - - # Check database and update/insert if necessary - return { - 'type': '%s_%s' % Scanner.file_types.get(type), - 'path': filepath, - 'part': fireEvent('scanner.partnumber', file, single = True), - } - def deleteView(self, id = None, **kwargs): return { diff --git a/couchpotato/core/plugins/scanner/main.py b/couchpotato/core/plugins/scanner/main.py index 744c7bd..7d38424 100644 --- a/couchpotato/core/plugins/scanner/main.py +++ b/couchpotato/core/plugins/scanner/main.py @@ -1,3 +1,4 @@ +from couchpotato import get_db from couchpotato.core.event import fireEvent, addEvent from couchpotato.core.helpers.encoding import toUnicode, simplifyString, sp from couchpotato.core.helpers.variable import getExt, getImdb, tryInt, \ @@ -6,6 +7,7 @@ from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from enzyme.exceptions import NoParserError, ParseError from guessit import guess_movie_info +from scandir import scandir from subliminal.videos import Video import enzyme import os @@ -123,7 +125,7 @@ class Scanner(Plugin): check_file_date = True try: files = [] - for root, dirs, walk_files in os.walk(folder): + for root, dirs, walk_files in scandir.walk(folder): files.extend([sp(os.path.join(root, filename)) for filename in walk_files]) # Break if CP wants to shut down @@ -138,7 +140,6 @@ class Scanner(Plugin): check_file_date = False files = [sp(x) for x in files] - for file_path in files: if not os.path.exists(file_path): @@ -418,7 +419,7 @@ class Scanner(Plugin): if not group['media']: log.error('Unable to determine media: %s', group['identifiers']) else: - group['identifier'] = group['media']['identifier'] + group['identifier'] = group['media']['imdb'] processed_movies[identifier] = group @@ -610,9 +611,12 @@ class Scanner(Plugin): log.debug('Identifier to short to use for search: %s', identifier) if imdb_id: - return fireEvent('library.add.movie', attrs = { - 'identifier': imdb_id - }, update_after = False, single = True) + try: + db = get_db() + return db.get('media', imdb_id, with_doc = True)['doc']['info'] + except: + log.debug('Movie "%s" not in library, just getting info', imdb_id) + return fireEvent('movie.info', identifier = imdb_id, merge = True, extended = False) log.error('No imdb_id found for %s. Add a NFO file with IMDB id or add the year to the filename.', group['identifiers']) return {} diff --git a/libs/scandir/.gitattributes b/libs/scandir/.gitattributes new file mode 100644 index 0000000..176a458 --- /dev/null +++ b/libs/scandir/.gitattributes @@ -0,0 +1 @@ +* text=auto diff --git a/libs/scandir/.gitignore b/libs/scandir/.gitignore new file mode 100644 index 0000000..48878c7 --- /dev/null +++ b/libs/scandir/.gitignore @@ -0,0 +1,4 @@ +*.pyc +*.pyd +benchtree +build diff --git a/libs/scandir/__init__.py b/libs/scandir/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/scandir/_scandir.c b/libs/scandir/_scandir.c new file mode 100644 index 0000000..26c81ed --- /dev/null +++ b/libs/scandir/_scandir.c @@ -0,0 +1,373 @@ +// scandir C speedups +// +// TODO: this is a work in progress! +// +// There's a fair bit of PY_MAJOR_VERSION boilerplate to support both Python 2 +// and Python 3 -- the structure of this is taken from here: +// http://docs.python.org/3.3/howto/cporting.html + +#include +#include + +#ifdef MS_WINDOWS +#include +#endif + +#if PY_MAJOR_VERSION >= 3 +#define INITERROR return NULL +#define FROM_LONG PyLong_FromLong +#define FROM_STRING PyUnicode_FromStringAndSize +#else +#define INITERROR return +#define FROM_LONG PyInt_FromLong +#define FROM_STRING PyString_FromStringAndSize +#endif + +#ifdef MS_WINDOWS + +static PyObject * +win32_error_unicode(char* function, Py_UNICODE* filename) +{ + errno = GetLastError(); + if (filename) + return PyErr_SetFromWindowsErrWithUnicodeFilename(errno, filename); + else + return PyErr_SetFromWindowsErr(errno); +} + +/* Below, we *know* that ugo+r is 0444 */ +#if _S_IREAD != 0400 +#error Unsupported C library +#endif +static int +attributes_to_mode(DWORD attr) +{ + int m = 0; + if (attr & FILE_ATTRIBUTE_DIRECTORY) + m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */ + else + m |= _S_IFREG; + if (attr & FILE_ATTRIBUTE_READONLY) + m |= 0444; + else + m |= 0666; + if (attr & FILE_ATTRIBUTE_REPARSE_POINT) + m |= 0120000; // S_IFLNK + return m; +} + +double +filetime_to_time(FILETIME *filetime) +{ + const double SECONDS_BETWEEN_EPOCHS = 11644473600.0; + + unsigned long long total = (unsigned long long)filetime->dwHighDateTime << 32 | + (unsigned long long)filetime->dwLowDateTime; + return (double)total / 10000000.0 - SECONDS_BETWEEN_EPOCHS; +} + +static PyTypeObject StatResultType; + +static PyObject * +find_data_to_statresult(WIN32_FIND_DATAW *data) +{ + PY_LONG_LONG size; + PyObject *v = PyStructSequence_New(&StatResultType); + if (v == NULL) + return NULL; + + size = (PY_LONG_LONG)data->nFileSizeHigh << 32 | + (PY_LONG_LONG)data->nFileSizeLow; + + PyStructSequence_SET_ITEM(v, 0, FROM_LONG(attributes_to_mode(data->dwFileAttributes))); + PyStructSequence_SET_ITEM(v, 1, FROM_LONG(0)); + PyStructSequence_SET_ITEM(v, 2, FROM_LONG(0)); + PyStructSequence_SET_ITEM(v, 3, FROM_LONG(0)); + PyStructSequence_SET_ITEM(v, 4, FROM_LONG(0)); + PyStructSequence_SET_ITEM(v, 5, FROM_LONG(0)); + PyStructSequence_SET_ITEM(v, 6, PyLong_FromLongLong((PY_LONG_LONG)size)); + PyStructSequence_SET_ITEM(v, 7, PyFloat_FromDouble(filetime_to_time(&data->ftLastAccessTime))); + PyStructSequence_SET_ITEM(v, 8, PyFloat_FromDouble(filetime_to_time(&data->ftLastWriteTime))); + PyStructSequence_SET_ITEM(v, 9, PyFloat_FromDouble(filetime_to_time(&data->ftCreationTime))); + + if (PyErr_Occurred()) { + Py_DECREF(v); + return NULL; + } + + return v; +} + +static PyStructSequence_Field stat_result_fields[] = { + {"st_mode", "protection bits"}, + {"st_ino", "inode"}, + {"st_dev", "device"}, + {"st_nlink", "number of hard links"}, + {"st_uid", "user ID of owner"}, + {"st_gid", "group ID of owner"}, + {"st_size", "total size, in bytes"}, + {"st_atime", "time of last access"}, + {"st_mtime", "time of last modification"}, + {"st_ctime", "time of last change"}, + {0} +}; + +static PyStructSequence_Desc stat_result_desc = { + "stat_result", /* name */ + NULL, /* doc */ + stat_result_fields, + 10 +}; + +static PyObject * +scandir_helper(PyObject *self, PyObject *args) +{ + PyObject *d, *v; + HANDLE hFindFile; + BOOL result; + WIN32_FIND_DATAW wFileData; + Py_UNICODE *wnamebuf; + Py_ssize_t len; + PyObject *po; + PyObject *name_stat; + + if (!PyArg_ParseTuple(args, "U:scandir_helper", &po)) + return NULL; + + /* Overallocate for \\*.*\0 */ + len = PyUnicode_GET_SIZE(po); + wnamebuf = malloc((len + 5) * sizeof(wchar_t)); + if (!wnamebuf) { + PyErr_NoMemory(); + return NULL; + } + wcscpy(wnamebuf, PyUnicode_AS_UNICODE(po)); + if (len > 0) { + Py_UNICODE wch = wnamebuf[len-1]; + if (wch != L'/' && wch != L'\\' && wch != L':') + wnamebuf[len++] = L'\\'; + wcscpy(wnamebuf + len, L"*.*"); + } + if ((d = PyList_New(0)) == NULL) { + free(wnamebuf); + return NULL; + } + Py_BEGIN_ALLOW_THREADS + hFindFile = FindFirstFileW(wnamebuf, &wFileData); + Py_END_ALLOW_THREADS + if (hFindFile == INVALID_HANDLE_VALUE) { + int error = GetLastError(); + if (error == ERROR_FILE_NOT_FOUND) { + free(wnamebuf); + return d; + } + Py_DECREF(d); + win32_error_unicode("FindFirstFileW", wnamebuf); + free(wnamebuf); + return NULL; + } + do { + /* Skip over . and .. */ + if (wcscmp(wFileData.cFileName, L".") != 0 && + wcscmp(wFileData.cFileName, L"..") != 0) { + v = PyUnicode_FromUnicode(wFileData.cFileName, wcslen(wFileData.cFileName)); + if (v == NULL) { + Py_DECREF(d); + d = NULL; + break; + } + name_stat = Py_BuildValue("ON", v, find_data_to_statresult(&wFileData)); + if (name_stat == NULL) { + Py_DECREF(v); + Py_DECREF(d); + d = NULL; + break; + } + if (PyList_Append(d, name_stat) != 0) { + Py_DECREF(v); + Py_DECREF(d); + Py_DECREF(name_stat); + d = NULL; + break; + } + Py_DECREF(name_stat); + Py_DECREF(v); + } + Py_BEGIN_ALLOW_THREADS + result = FindNextFileW(hFindFile, &wFileData); + Py_END_ALLOW_THREADS + /* FindNextFile sets error to ERROR_NO_MORE_FILES if + it got to the end of the directory. */ + if (!result && GetLastError() != ERROR_NO_MORE_FILES) { + Py_DECREF(d); + win32_error_unicode("FindNextFileW", wnamebuf); + FindClose(hFindFile); + free(wnamebuf); + return NULL; + } + } while (result == TRUE); + + if (FindClose(hFindFile) == FALSE) { + Py_DECREF(d); + win32_error_unicode("FindClose", wnamebuf); + free(wnamebuf); + return NULL; + } + free(wnamebuf); + return d; +} + +#else // Linux / OS X + +#include +#define NAMLEN(dirent) strlen((dirent)->d_name) + +static PyObject * +posix_error_with_allocated_filename(char* name) +{ + PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name); + PyMem_Free(name); + return rc; +} + +static PyObject * +scandir_helper(PyObject *self, PyObject *args) +{ + char *name = NULL; + PyObject *d, *v, *name_type; + DIR *dirp; + struct dirent *ep; + int arg_is_unicode = 1; + + errno = 0; + if (!PyArg_ParseTuple(args, "U:scandir_helper", &v)) { + arg_is_unicode = 0; + PyErr_Clear(); + } + if (!PyArg_ParseTuple(args, "et:scandir_helper", Py_FileSystemDefaultEncoding, &name)) + return NULL; + Py_BEGIN_ALLOW_THREADS + dirp = opendir(name); + Py_END_ALLOW_THREADS + if (dirp == NULL) { + return posix_error_with_allocated_filename(name); + } + if ((d = PyList_New(0)) == NULL) { + Py_BEGIN_ALLOW_THREADS + closedir(dirp); + Py_END_ALLOW_THREADS + PyMem_Free(name); + return NULL; + } + for (;;) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + ep = readdir(dirp); + Py_END_ALLOW_THREADS + if (ep == NULL) { + if (errno == 0) { + break; + } else { + Py_BEGIN_ALLOW_THREADS + closedir(dirp); + Py_END_ALLOW_THREADS + Py_DECREF(d); + return posix_error_with_allocated_filename(name); + } + } + if (ep->d_name[0] == '.' && + (NAMLEN(ep) == 1 || + (ep->d_name[1] == '.' && NAMLEN(ep) == 2))) + continue; + v = FROM_STRING(ep->d_name, NAMLEN(ep)); + if (v == NULL) { + Py_DECREF(d); + d = NULL; + break; + } + if (arg_is_unicode) { + PyObject *w; + + w = PyUnicode_FromEncodedObject(v, + Py_FileSystemDefaultEncoding, + "strict"); + if (w != NULL) { + Py_DECREF(v); + v = w; + } + else { + /* fall back to the original byte string, as + discussed in patch #683592 */ + PyErr_Clear(); + } + } + name_type = Py_BuildValue("ON", v, FROM_LONG(ep->d_type)); + if (name_type == NULL) { + Py_DECREF(v); + Py_DECREF(d); + d = NULL; + break; + } + if (PyList_Append(d, name_type) != 0) { + Py_DECREF(v); + Py_DECREF(d); + Py_DECREF(name_type); + d = NULL; + break; + } + Py_DECREF(name_type); + Py_DECREF(v); + } + Py_BEGIN_ALLOW_THREADS + closedir(dirp); + Py_END_ALLOW_THREADS + PyMem_Free(name); + + return d; +} + +#endif + +static PyMethodDef scandir_methods[] = { + {"scandir_helper", (PyCFunction)scandir_helper, METH_VARARGS, NULL}, + {NULL, NULL}, +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_scandir", + NULL, + 0, + scandir_methods, + NULL, + NULL, + NULL, + NULL, +}; +#endif + +#if PY_MAJOR_VERSION >= 3 +PyObject * +PyInit__scandir(void) +{ + PyObject *module = PyModule_Create(&moduledef); +#else +void +init_scandir(void) +{ + PyObject *module = Py_InitModule("_scandir", scandir_methods); +#endif + if (module == NULL) { + INITERROR; + } + +#ifdef MS_WINDOWS + stat_result_desc.name = "scandir.stat_result"; + PyStructSequence_InitType(&StatResultType, &stat_result_desc); +#endif + +#if PY_MAJOR_VERSION >= 3 + return module; +#endif +} diff --git a/libs/scandir/scandir.py b/libs/scandir/scandir.py new file mode 100644 index 0000000..1e34f8e --- /dev/null +++ b/libs/scandir/scandir.py @@ -0,0 +1,456 @@ +"""scandir, a better directory iterator that exposes all file info OS provides + +scandir is a generator version of os.listdir() that returns an iterator over +files in a directory, and also exposes the extra information most OSes provide +while iterating files in a directory. + +See README.md or https://github.com/benhoyt/scandir for rationale and docs. + +scandir is released under the new BSD 3-clause license. See LICENSE.txt for +the full license text. +""" + +from __future__ import division + +import ctypes +import os +import stat +import sys + +__version__ = '0.3' +__all__ = ['scandir', 'walk'] + +# Shortcuts to these functions for speed and ease +join = os.path.join +lstat = os.lstat + +S_IFDIR = stat.S_IFDIR +S_IFREG = stat.S_IFREG +S_IFLNK = stat.S_IFLNK + +# 'unicode' isn't defined on Python 3 +try: + unicode +except NameError: + unicode = str + +_scandir = None + + +class GenericDirEntry(object): + __slots__ = ('name', '_lstat', '_path') + + def __init__(self, path, name): + self._path = path + self.name = name + self._lstat = None + + def lstat(self): + if self._lstat is None: + self._lstat = lstat(join(self._path, self.name)) + return self._lstat + + def is_dir(self): + try: + self.lstat() + except OSError: + return False + return self._lstat.st_mode & 0o170000 == S_IFDIR + + def is_file(self): + try: + self.lstat() + except OSError: + return False + return self._lstat.st_mode & 0o170000 == S_IFREG + + def is_symlink(self): + try: + self.lstat() + except OSError: + return False + return self._lstat.st_mode & 0o170000 == S_IFLNK + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + +if sys.platform == 'win32': + from ctypes import wintypes + + # Various constants from windows.h + INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value + ERROR_FILE_NOT_FOUND = 2 + ERROR_NO_MORE_FILES = 18 + FILE_ATTRIBUTE_READONLY = 1 + FILE_ATTRIBUTE_DIRECTORY = 16 + FILE_ATTRIBUTE_REPARSE_POINT = 1024 + + # Numer of seconds between 1601-01-01 and 1970-01-01 + SECONDS_BETWEEN_EPOCHS = 11644473600 + + kernel32 = ctypes.windll.kernel32 + + # ctypes wrappers for (wide string versions of) FindFirstFile, + # FindNextFile, and FindClose + FindFirstFile = kernel32.FindFirstFileW + FindFirstFile.argtypes = [ + wintypes.LPCWSTR, + ctypes.POINTER(wintypes.WIN32_FIND_DATAW), + ] + FindFirstFile.restype = wintypes.HANDLE + + FindNextFile = kernel32.FindNextFileW + FindNextFile.argtypes = [ + wintypes.HANDLE, + ctypes.POINTER(wintypes.WIN32_FIND_DATAW), + ] + FindNextFile.restype = wintypes.BOOL + + FindClose = kernel32.FindClose + FindClose.argtypes = [wintypes.HANDLE] + FindClose.restype = wintypes.BOOL + + def filetime_to_time(filetime): + """Convert Win32 FILETIME to time since Unix epoch in seconds.""" + total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime + return total / 10000000 - SECONDS_BETWEEN_EPOCHS + + def find_data_to_stat(data): + """Convert Win32 FIND_DATA struct to stat_result.""" + # First convert Win32 dwFileAttributes to st_mode + attributes = data.dwFileAttributes + st_mode = 0 + if attributes & FILE_ATTRIBUTE_DIRECTORY: + st_mode |= S_IFDIR | 0o111 + else: + st_mode |= S_IFREG + if attributes & FILE_ATTRIBUTE_READONLY: + st_mode |= 0o444 + else: + st_mode |= 0o666 + if attributes & FILE_ATTRIBUTE_REPARSE_POINT: + st_mode |= S_IFLNK + + st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow + st_atime = filetime_to_time(data.ftLastAccessTime) + st_mtime = filetime_to_time(data.ftLastWriteTime) + st_ctime = filetime_to_time(data.ftCreationTime) + + # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev, + # st_nlink, st_uid, st_gid + return os.stat_result((st_mode, 0, 0, 0, 0, 0, st_size, st_atime, + st_mtime, st_ctime)) + + class Win32DirEntry(object): + __slots__ = ('name', '_lstat', '_find_data') + + def __init__(self, name, find_data): + self.name = name + self._lstat = None + self._find_data = find_data + + def lstat(self): + if self._lstat is None: + # Lazily convert to stat object, because it's slow, and often + # we only need is_dir() etc + self._lstat = find_data_to_stat(self._find_data) + return self._lstat + + def is_dir(self): + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_DIRECTORY != 0) + + def is_file(self): + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_DIRECTORY == 0) + + def is_symlink(self): + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_REPARSE_POINT != 0) + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def win_error(error, filename): + exc = WindowsError(error, ctypes.FormatError(error)) + exc.filename = filename + return exc + + def scandir(path='.', windows_wildcard='*.*'): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + # Call FindFirstFile and handle errors + data = wintypes.WIN32_FIND_DATAW() + data_p = ctypes.byref(data) + filename = join(path, windows_wildcard) + handle = FindFirstFile(filename, data_p) + if handle == INVALID_HANDLE_VALUE: + error = ctypes.GetLastError() + if error == ERROR_FILE_NOT_FOUND: + # No files, don't yield anything + return + raise win_error(error, path) + + # Call FindNextFile in a loop, stopping when no more files + try: + while True: + # Skip '.' and '..' (current and parent directory), but + # otherwise yield (filename, stat_result) tuple + name = data.cFileName + if name not in ('.', '..'): + yield Win32DirEntry(name, data) + + data = wintypes.WIN32_FIND_DATAW() + data_p = ctypes.byref(data) + success = FindNextFile(handle, data_p) + if not success: + error = ctypes.GetLastError() + if error == ERROR_NO_MORE_FILES: + break + raise win_error(error, path) + finally: + if not FindClose(handle): + raise win_error(ctypes.GetLastError(), path) + + try: + import _scandir + + scandir_helper = _scandir.scandir_helper + + class Win32DirEntry(object): + __slots__ = ('name', '_lstat') + + def __init__(self, name, lstat): + self.name = name + self._lstat = lstat + + def lstat(self): + return self._lstat + + def is_dir(self): + return self._lstat.st_mode & 0o170000 == S_IFDIR + + def is_file(self): + return self._lstat.st_mode & 0o170000 == S_IFREG + + def is_symlink(self): + return self._lstat.st_mode & 0o170000 == S_IFLNK + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def scandir(path='.'): + for name, stat in scandir_helper(unicode(path)): + yield Win32DirEntry(name, stat) + + except ImportError: + pass + + +# Linux, OS X, and BSD implementation +elif sys.platform.startswith(('linux', 'darwin')) or 'bsd' in sys.platform: + import ctypes.util + + DIR_p = ctypes.c_void_p + + # Rather annoying how the dirent struct is slightly different on each + # platform. The only fields we care about are d_name and d_type. + class Dirent(ctypes.Structure): + if sys.platform.startswith('linux'): + _fields_ = ( + ('d_ino', ctypes.c_ulong), + ('d_off', ctypes.c_long), + ('d_reclen', ctypes.c_ushort), + ('d_type', ctypes.c_byte), + ('d_name', ctypes.c_char * 256), + ) + else: + _fields_ = ( + ('d_ino', ctypes.c_uint32), # must be uint32, not ulong + ('d_reclen', ctypes.c_ushort), + ('d_type', ctypes.c_byte), + ('d_namlen', ctypes.c_byte), + ('d_name', ctypes.c_char * 256), + ) + + DT_UNKNOWN = 0 + DT_DIR = 4 + DT_REG = 8 + DT_LNK = 10 + + Dirent_p = ctypes.POINTER(Dirent) + Dirent_pp = ctypes.POINTER(Dirent_p) + + libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) + opendir = libc.opendir + opendir.argtypes = [ctypes.c_char_p] + opendir.restype = DIR_p + + readdir_r = libc.readdir_r + readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp] + readdir_r.restype = ctypes.c_int + + closedir = libc.closedir + closedir.argtypes = [DIR_p] + closedir.restype = ctypes.c_int + + file_system_encoding = sys.getfilesystemencoding() + + class PosixDirEntry(object): + __slots__ = ('name', '_d_type', '_lstat', '_path') + + def __init__(self, path, name, d_type): + self._path = path + self.name = name + self._d_type = d_type + self._lstat = None + + def lstat(self): + if self._lstat is None: + self._lstat = lstat(join(self._path, self.name)) + return self._lstat + + # Ridiculous duplication between these is* functions -- helps a little + # bit with os.walk() performance compared to calling another function. + def is_dir(self): + d_type = self._d_type + if d_type != DT_UNKNOWN: + return d_type == DT_DIR + try: + self.lstat() + except OSError: + return False + return self._lstat.st_mode & 0o170000 == S_IFDIR + + def is_file(self): + d_type = self._d_type + if d_type != DT_UNKNOWN: + return d_type == DT_REG + try: + self.lstat() + except OSError: + return False + return self._lstat.st_mode & 0o170000 == S_IFREG + + def is_symlink(self): + d_type = self._d_type + if d_type != DT_UNKNOWN: + return d_type == DT_LNK + try: + self.lstat() + except OSError: + return False + return self._lstat.st_mode & 0o170000 == S_IFLNK + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def posix_error(filename): + errno = ctypes.get_errno() + exc = OSError(errno, os.strerror(errno)) + exc.filename = filename + return exc + + def scandir(path='.'): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + dir_p = opendir(path.encode(file_system_encoding)) + if not dir_p: + raise posix_error(path) + try: + result = Dirent_p() + while True: + entry = Dirent() + if readdir_r(dir_p, entry, result): + raise posix_error(path) + if not result: + break + name = entry.d_name.decode(file_system_encoding) + if name not in ('.', '..'): + yield PosixDirEntry(path, name, entry.d_type) + finally: + if closedir(dir_p): + raise posix_error(path) + + try: + import _scandir + + scandir_helper = _scandir.scandir_helper + + def scandir(path='.'): + for name, d_type in scandir_helper(unicode(path)): + yield PosixDirEntry(path, name, d_type) + + except ImportError: + pass + + +# Some other system -- no d_type or stat information +else: + def scandir(path='.'): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + for name in os.listdir(path): + yield GenericDirEntry(path, name) + + +def walk(top, topdown=True, onerror=None, followlinks=False): + """Like os.walk(), but faster, as it uses scandir() internally.""" + # Determine which are files and which are directories + dirs = [] + nondirs = [] + try: + for entry in scandir(top): + if entry.is_dir(): + dirs.append(entry) + else: + nondirs.append(entry) + except OSError as error: + if onerror is not None: + onerror(error) + return + + # Yield before recursion if going top down + if topdown: + # Need to do some fancy footwork here as caller is allowed to modify + # dir_names, and we really want them to modify dirs (list of DirEntry + # objects) instead. Keep a mapping of entries keyed by name. + dir_names = [] + entries_by_name = {} + for entry in dirs: + dir_names.append(entry.name) + entries_by_name[entry.name] = entry + + yield top, dir_names, [e.name for e in nondirs] + + dirs = [] + for dir_name in dir_names: + entry = entries_by_name.get(dir_name) + if entry is None: + # Only happens when caller creates a new directory and adds it + # to dir_names + entry = GenericDirEntry(top, dir_name) + dirs.append(entry) + + # Recurse into sub-directories, following symbolic links if "followlinks" + for entry in dirs: + if followlinks or not entry.is_symlink(): + new_path = join(top, entry.name) + for x in walk(new_path, topdown, onerror, followlinks): + yield x + + # Yield before recursion if going bottom up + if not topdown: + yield top, [e.name for e in dirs], [e.name for e in nondirs]