CouchPotatoServer/libs/getmeta.py


								from flask.helpers import json

								from hachoir_core.cmd_line import unicodeFilename

								from hachoir_metadata import extractMetadata

								from hachoir_parser import createParser

								import datetime

								import re

								import sys


								def getMetadata(filename):

								    filename, realname = unicodeFilename(filename), filename

								    parser = createParser(filename, realname)

								    try:

								        metadata = extractMetadata(parser)

								    except:

								        return None


								    if metadata is not None:

								        metadata = metadata.exportPlaintext()

								        return metadata

								    return None


								def parseMetadata(meta, jsonsafe=True):

								    '''

								    Return a dict of section headings like 'Video stream' or 'Audio stream'.  Each key will have a list of dicts.

								    This supports multiple video/audio/subtitle/whatever streams per stream type.  Each element in the list of streams

								    will he a dict with keys like 'Image height' and 'Compression'...anything that hachoir is able to extract.


								    An example output:

								    {'Audio stream': [{u'Channel': u'6',

								                       u'Compression': u'A_AC3',

								                       u'Sample rate': u'48.0 kHz'}],

								     u'Common': [{u'Creation date': u'2008-03-20 09:09:43',

								                  u'Duration': u'1 hour 40 min 6 sec',

								                  u'Endianness': u'Big endian',

								                  u'MIME type': u'video/x-matroska',

								                  u'Producer': u'libebml v0.7.7 + libmatroska v0.8.1'}],

								     'Video stream': [{u'Compression': u'V_MPEG4/ISO/AVC',

								                       u'Image height': u'688 pixels',

								                       u'Image width': u'1280 pixels',

								                       u'Language': u'English'}]}

								    '''

								    if not meta:

								        return

								    sections = {}

								    what = []

								    for line in meta:

								        #if line doesn't start with "- " it is a section heading

								        if line[:2] != "- ":

								            section = line.strip(":").lower()


								            #lets collapse multiple stream headings into one...

								            search = re.search(r'#\d+\Z', section)

								            if search:

								                section = re.sub(search.group(), '', section).strip()


								            if section not in sections:

								                sections[section] = [dict()]

								            else:

								                sections[section].append(dict())

								        else:

								            #This isn't a section heading, so we put it in the last section heading we found.

								            #meta always starts out with a section heading so 'section' will always be defined

								            i = line.find(":")

								            key = line[2:i].lower()

								            value = _parseValue(section, key, line[i+2:])


								            if value is None:

								                value = line[i+2:]


								            if jsonsafe:

								                try:

								                    v = json.dumps(value)

								                except TypeError:

								                    value = str(value)


								            sections[section][-1][key] = value


								    return sections


								def _parseValue(section, key, value, jsonsafe = True):

								    '''

								    Tediously check all the types that we know about (checked over 7k videos to find these)

								    and convert them to python native types.


								    If jsonsafe is True, we'll make json-unfriendly types like datetime into json-friendly.

								    '''


								    date_search = re.search("\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d", value)


								    if key == 'bit rate':

								        ret = _parseBitRate(value.lower())

								    elif key == 'bits/sample' or key == 'bits/pixel':

								        try:

								            bits = int(value.split()[0])

								            ret = bits

								        except:

								            ret = None

								    elif key == 'channel':

								        if value == 'stereo':

								            ret = 2

								        elif value == 'mono':

								            ret = 1

								        else:

								            try:

								                channels = int(value)

								                ret = channels

								            except:

								                ret = None

								    elif key == 'compression':

								        ret = _parseCompression(value)

								    elif key == 'compression rate':

								        try:

								            ret = float(value.split('x')[0])

								        except:

								            ret = None

								    elif key == 'duration':

								        try:

								            ret = _parseDuration(value)

								        except:

								            ret = None

								    elif key == 'sample rate':

								        try:

								            ret = float(value.split()[0]) * 1000

								        except:

								            ret = None

								    elif key == 'frame rate':

								        try:

								            ret = float(value.split()[0])

								        except:

								            pass

								    elif key == 'image height' or key == 'image width':

								        pixels =  re.match("(?P<pixels>\d{1,4}) pixel", value)

								        if pixels:

								            ret = int(pixels.group('pixels'))

								        else:

								            ret = None

								    elif date_search:

								        try:

								            ret = datetime.datetime.strptime(date_search.group(), "%Y-%m-%d %H:%M:%S")

								        except:

								            ret = None

								    else:

								        #If it's something we don't know about...

								        ret = None


								    return ret


								def _parseDuration(value):

								    t = re.search(r"((?P<hour>\d+) hour(s|))? ?((?P<min>\d+) min)? ?((?P<sec>\d+) sec)? ?((?P<ms>\d+) ms)?", value)

								    if t:

								        hour = 0 if not t.group('hour') else int(t.group('hour'))

								        min = 0 if not t.group('min') else int(t.group('min'))

								        sec = 0 if not t.group('sec') else int(t.group('sec'))

								        ms = 0 if not t.group('ms') else int(t.group('ms'))

								        return datetime.timedelta(hours = hour, minutes = min, seconds = sec, milliseconds = ms)


								def _parseCompression(value):

								    codecs = {

								        'v_mpeg4/iso/avc': 'AVC',

								        'x264': 'AVC',

								        'divx': 'divx',

								        'xvid': 'xvid',

								        'v_ms/vfw/fourcc': 'vfw',

								        'vorbis': 'vorbis',

								        'xvid': 'xvid',

								        'mpeg layer 3': 'mp3',

								        'a_dts': 'DTS',

								        'a_aac': 'AAC',

								        'a_truehd': 'TRUEHD',

								        'microsoft mpeg': 'MPEG',

								        'ac3': 'AC3',

								        'wvc1': 'WVC1',

								        'pulse code modulation': 'PCM',

								        'pcm': 'PCM',

								        'windows media audio': 'WMA',

								        'windows media video': 'WMV',

								        's_text/ascii': 'ASCII',

								        's_text/utf8': 'UTF8',

								        's_text/ssa': 'SSA',

								        's_text/ass': 'ASS'

								    }

								    for codec in codecs:

								        if codec in value.lower():

								            return codecs[codec]


								def _parseBitRate(value):

								    try:

								        bitrate = float(value.split()[0])

								    except:

								        return None


								    if 'kbit' in value.lower():

								        multi = 1000

								    elif 'mbit' in value.lower():

								        multi = 1000 * 1000

								    else:

								        return None


								    return bitrate * multi


								print json.dumps(parseMetadata(getMetadata(sys.argv[1])))