You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

478 lines
16 KiB

from couchpotato import get_session
from couchpotato.core.event import fireEvent, addEvent
from couchpotato.core.helpers.encoding import toUnicode
from couchpotato.core.helpers.variable import getExt
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
from couchpotato.core.settings.model import File, Library, Release, Movie
from couchpotato.environment import Env
from flask.helpers import json
from themoviedb.tmdb import opensubtitleHashFile
import os
import re
import subprocess
import traceback
log = CPLog(__name__)
class Scanner(Plugin):
minimal_filesize = {
'media': 314572800, # 300MB
'trailer': 1048576, # 1MB
}
ignored_in_path = ['_unpack', '_failed_', '_unknown_', '_exists_', '.appledouble', '.appledb', '.appledesktop', os.path.sep + '._', '.ds_store', 'cp.cpnfo'] #unpacking, smb-crap, hidden files
ignore_names = ['extract', 'extracting', 'extracted', 'movie', 'movies', 'film', 'films']
extensions = {
'movie': ['mkv', 'wmv', 'avi', 'mpg', 'mpeg', 'mp4', 'm2ts', 'iso', 'img'],
'dvd': ['vts_*', 'vob'],
'nfo': ['nfo', 'txt', 'tag'],
'subtitle': ['sub', 'srt', 'ssa', 'ass'],
'subtitle_extra': ['idx'],
'trailer': ['mov', 'mp4', 'flv']
}
file_types = {
'subtitle': ('subtitle', 'subtitle'),
'trailer': ('video', 'trailer'),
'nfo': ('nfo', 'nfo'),
'movie': ('video', 'movie'),
'backdrop': ('image', 'backdrop'),
}
codecs = {
'audio': ['dts', 'ac3', 'ac3d', 'mp3'],
'video': ['x264', 'divx', 'xvid']
}
source_media = {
'bluray': ['bluray', 'blu-ray', 'brrip', 'br-rip'],
'hddvd': ['hddvd', 'hd-dvd'],
'dvd': ['dvd'],
'hdtv': ['hdtv']
}
clean = '(?i)[^\s](ac3|dts|custom|dc|divx|divx5|dsr|dsrip|dutch|dvd|dvdrip|dvdscr|dvdscreener|screener|dvdivx|cam|fragment|fs|hdtv|hdrip|hdtvrip|internal|limited|multisubs|ntsc|ogg|ogm|pal|pdtv|proper|repack|rerip|retail|r3|r5|bd5|se|svcd|swedish|german|read.nfo|nfofix|unrated|ws|telesync|ts|telecine|tc|brrip|bdrip|480p|480i|576p|576i|720p|720i|1080p|1080i|hrhd|hrhdtv|hddvd|bluray|x264|h264|xvid|xvidvd|xxx|www.www|cd[1-9]|\[.*\])[^\s]*'
multipart_regex = [
'[ _\.-]+cd[ _\.-]*([0-9a-d]+)', #*cd1
'[ _\.-]+dvd[ _\.-]*([0-9a-d]+)', #*dvd1
'[ _\.-]+part[ _\.-]*([0-9a-d]+)', #*part1.mkv
'[ _\.-]+dis[ck][ _\.-]*([0-9a-d]+)', #*disk1.mkv
'()[ _\.-]+([0-9]*[abcd]+)(\.....?)$',
'([a-z])([0-9]+)(\.....?)$',
'()([ab])(\.....?)$' #*a.mkv
]
def __init__(self):
addEvent('app.load', self.scan)
def scan(self, folder = '/Volumes/Media/Test/'):
"""
Get all files
For each file larger then 350MB
create movie "group", this is where all movie files will be grouped
group multipart together
check if its DVD (VIDEO_TS)
# This should work for non-folder based structure
for each moviegroup
for each file smaller then 350MB, allfiles.filter(moviename*)
# Assuming the beginning of the filename is the same for this structure
Movie is masterfile, moviename-cd1.ext -> moviename
Find other files connected to moviename, moviename*.nfo, moviename*.sub, moviename*trailer.ext
Remove found file from allfiles
# This should work for folder based structure
for each leftover file
Loop over leftover files, use dirname as moviename
For each found movie
determine filetype
Check if it's already in the db
Add it to database
"""
# Get movie "master" files
movie_files = {}
leftovers = []
for root, dirs, files in os.walk(folder):
for filename in files:
file_path = os.path.join(root, filename)
# Remove ignored files
if not self.keepFile(file_path):
continue
is_dvd_file = self.isDVDFile(file_path)
if os.path.getsize(file_path) > self.minimal_filesize['media'] or is_dvd_file: # Minimal 300MB files or is DVD file
identifier = self.createFileIdentifier(file_path, folder, exclude_filename = is_dvd_file)
if not movie_files.get(identifier):
movie_files[identifier] = {
'unsorted_files': [],
'identifiers': [],
'is_dvd': is_dvd_file,
}
movie_files[identifier]['unsorted_files'].append(file_path)
else:
leftovers.append(file_path)
# Sort reverse, this prevents "Iron man 2" from getting grouped with "Iron man" as the "Iron Man 2"
# files will be grouped first.
leftovers = set(sorted(leftovers, reverse = True))
id_handles = [
None, # Attach files to group by identifier
lambda x: os.path.split(x)[-1], # Attach files via filename of master_file name only
os.path.dirname, # Attach files via master_file dirname
]
# Create identifier based on handle
for handler in id_handles:
for identifier, group in movie_files.iteritems():
identifier = handler(identifier) if handler else identifier
if identifier not in group['identifiers'] and len(identifier) > 0: group['identifiers'].append(identifier)
# Group the files based on the identifier
found_files = self.getGroupFiles(identifier, folder, leftovers)
group['unsorted_files'].extend(found_files)
# Remove the found files from the leftover stack
leftovers = leftovers - found_files
# Open up the db
db = get_session()
# Mark all files as "offline" before a adding them to the database (again)
files_in_path = db.query(File).filter(File.path.like(toUnicode(folder) + u'%%'))
files_in_path.update({'available': 0}, synchronize_session = False)
db.commit()
# Determine file types
update_after = []
for identifier, group in movie_files.iteritems():
# Group extra (and easy) files first
images = self.getImages(group['unsorted_files'])
group['files'] = {
'subtitle': self.getSubtitles(group['unsorted_files']),
'nfo': self.getNfo(group['unsorted_files']),
'trailer': self.getTrailers(group['unsorted_files']),
'backdrop': images['backdrop'],
'leftover': set(group['unsorted_files']),
}
# Media files
if group['is_dvd']:
group['files']['movie'] = self.getDVDFiles(group['unsorted_files'])
else:
group['files']['movie'] = self.getMediaFiles(group['unsorted_files'])
group['meta_data'] = self.getMetaData(group['files']['movie'])
# Leftover "sorted" files
for type in group['files']:
group['files']['leftover'] -= set(group['files'][type])
# Delete the unsorted list
del group['unsorted_files']
# Determine movie
group['library'] = self.determineMovie(group)
# Save to DB
if group['library']:
#library = db.query(Library).filter_by(id = library.get('id')).one()
# Add release
release = self.addRelease(group)
return
# Add identifier for library update
update_after.append(group['library'].get('identifier'))
for identifier in update_after:
fireEvent('library.update', identifier = identifier)
# If cleanup option is enabled, remove offline files from database
if self.conf('cleanup_offline'):
files_in_path = db.query(File).filter(File.path.like(folder + '%%')).filter_by(available = 0)
[db.delete(x) for x in files_in_path]
db.commit()
db.remove()
def addRelease(self, group):
db = get_session()
identifier = '%s.%s.%s' % (group['library']['identifier'], group['meta_data']['audio'], group['meta_data']['quality'])
# Add movie
done_status = fireEvent('status.get', 'done', single = True)
movie = db.query(Movie).filter_by(library_id = group['library'].get('id')).first()
if not movie:
movie = Movie(
library_id = group['library'].get('id'),
profile_id = 0,
status_id = done_status.get('id')
)
db.add(movie)
db.commit()
# Add release
quality = fireEvent('quality.single', group['meta_data']['quality'], single = True)
release = db.query(Release).filter_by(identifier = identifier).first()
if not release:
release = Release(
identifier = identifier,
movie = movie,
quality_id = quality.get('id'),
status_id = done_status.get('id')
)
db.add(release)
db.commit()
# Add each file type
for type in group['files']:
for file in group['files'][type]:
added_file = self.saveFile(file, type = type, include_media_info = type is 'movie')
try:
added_file = db.query(File).filter_by(id = added_file.get('id')).one()
release.files.append(added_file)
db.commit()
except Exception, e:
log.debug('Failed to attach "%s" to release: %s' % (file, e))
db.remove()
def getMetaData(self, files):
return {
'audio': 'AC3',
'quality': '720p',
'quality_type': 'HD',
'resolution_width': 1280,
'resolution_height': 720
}
for file in files:
self.getMeta(file)
def getMeta(self, filename):
lib_dir = os.path.join(Env.get('app_dir'), 'libs')
script = os.path.join(lib_dir, 'getmeta.py')
p = subprocess.Popen(["python", script, filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE, cwd = lib_dir)
z = p.communicate()[0]
try:
meta = json.loads(z)
log.info('Retrieved metainfo: %s' % meta)
return meta
except Exception, e:
print e
log.error('Couldn\'t get metadata from file')
def determineMovie(self, group):
imdb_id = None
files = group['files']
# Check and see if nfo contains the imdb-id
try:
for nfo_file in files['nfo']:
imdb_id = self.getImdb(nfo_file)
if imdb_id: break
except:
pass
# Check if path is already in db
db = get_session()
for file in files['movie']:
f = db.query(File).filter_by(path = toUnicode(file)).first()
try:
imdb_id = f.library[0].identifier
break
except:
pass
db.remove()
# Search based on identifiers
if not imdb_id:
for identifier in group['identifiers']:
if len(identifier) > 2:
movie = fireEvent('provider.movie.search', q = identifier, merge = True, limit = 1)
if len(movie) > 0:
imdb_id = movie[0]['imdb']
if imdb_id: break
else:
log.debug('Identifier to short to use for search: %s' % identifier)
if imdb_id:
#movie = fireEvent('provider.movie.info', identifier = imdb_id, merge = True)
#if movie and movie.get('imdb'):
return fireEvent('library.add', attrs = {
'identifier': imdb_id
}, update_after = False, single = True)
log.error('No imdb_id found for %s.' % group['identifiers'])
return False
def saveFile(self, file, type = 'unknown', include_media_info = False):
properties = {}
# Get media info for files
if include_media_info:
properties = {}
# Check database and update/insert if necessary
return fireEvent('file.add', path = file, part = self.getPartNumber(file), type = self.file_types[type], properties = properties, single = True)
def getImdb(self, txt):
if os.path.isfile(txt):
output = open(txt, 'r')
txt = output.read()
output.close()
try:
m = re.search('(?P<id>tt[0-9{7}]+)', txt)
id = m.group('id')
if id: return id
except AttributeError:
pass
return False
def getMediaFiles(self, files):
def test(s):
return self.filesizeBetween(s, 300, 100000) and getExt(s.lower()) in self.extensions['movie']
return set(filter(test, files))
def getDVDFiles(self, files):
def test(s):
return self.isDVDFile(s)
return set(filter(test, files))
def getSubtitles(self, files):
return set(filter(lambda s: getExt(s.lower()) in self.extensions['subtitle'], files))
def getNfo(self, files):
return set(filter(lambda s: getExt(s.lower()) in self.extensions['nfo'], files))
def getTrailers(self, files):
def test(s):
return re.search('(^|[\W_])trailer\d*[\W_]', s.lower()) and self.filesizeBetween(s, 2, 250)
return set(filter(test, files))
def getImages(self, files):
def test(s):
return getExt(s.lower()) in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tbn']
files = set(filter(test, files))
images = {}
# Fanart
images['backdrop'] = set(filter(lambda s: re.search('(^|[\W_])fanart|backdrop\d*[\W_]', s.lower()) and self.filesizeBetween(s, 0, 5), files))
# Rest
images['rest'] = files - images['backdrop']
return images
def isDVDFile(self, file):
if list(set(file.lower().split(os.path.sep)) & set(['video_ts', 'audio_ts'])):
return True
for needle in ['vts_', 'video_ts', 'audio_ts']:
if needle in file.lower():
return True
return False
def keepFile(self, file):
# ignoredpaths
for i in self.ignored_in_path:
if i in file.lower():
log.debug('Ignored "%s" contains "%s".' % (file, i))
return False
# Sample file
if re.search('(^|[\W_])sample\d*[\W_]', file.lower()):
log.debug('Is sample file "%s".' % file)
return False
# Minimal size
if self.filesizeBetween(file, self.minimal_filesize['media']):
log.debug('File to small: %s' % file)
return False
# All is OK
return True
def filesizeBetween(self, file, min = 0, max = 100000):
try:
return (min * 1048576) < os.path.getsize(file) < (max * 1048576)
except:
log.error('Couldn\'t get filesize of %s.' % file)
return False
def getGroupFiles(self, identifier, folder, file_pile):
return set(filter(lambda s:identifier in self.createFileIdentifier(s, folder), file_pile))
def createFileIdentifier(self, file_path, folder, exclude_filename = False):
identifier = file_path.replace(folder, '') # root folder
identifier = os.path.splitext(identifier)[0] # ext
if exclude_filename:
identifier = identifier[:len(identifier) - len(os.path.split(identifier)[-1])]
identifier = self.removeMultipart(identifier) # multipart
return identifier
def removeMultipart(self, name):
for regex in self.multipart_regex:
try:
found = re.sub(regex, '', name)
if found != name:
return found
except:
pass
return name
def getPartNumber(self, name):
for regex in self.multipart_regex:
try:
found = re.search(regex, name)
if found:
return found.group(1)
return 1
except:
pass
return name