diff --git a/libs/guessit/ISO-3166-1_utf8.txt b/libs/guessit/ISO-3166-1_utf8.txt
new file mode 100644
index 0000000..7022040
--- /dev/null
+++ b/libs/guessit/ISO-3166-1_utf8.txt
@@ -0,0 +1,249 @@
+Afghanistan|AF|AFG|004|ISO 3166-2:AF
+Åland Islands|AX|ALA|248|ISO 3166-2:AX
+Albania|AL|ALB|008|ISO 3166-2:AL
+Algeria|DZ|DZA|012|ISO 3166-2:DZ
+American Samoa|AS|ASM|016|ISO 3166-2:AS
+Andorra|AD|AND|020|ISO 3166-2:AD
+Angola|AO|AGO|024|ISO 3166-2:AO
+Anguilla|AI|AIA|660|ISO 3166-2:AI
+Antarctica|AQ|ATA|010|ISO 3166-2:AQ
+Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
+Argentina|AR|ARG|032|ISO 3166-2:AR
+Armenia|AM|ARM|051|ISO 3166-2:AM
+Aruba|AW|ABW|533|ISO 3166-2:AW
+Australia|AU|AUS|036|ISO 3166-2:AU
+Austria|AT|AUT|040|ISO 3166-2:AT
+Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
+Bahamas|BS|BHS|044|ISO 3166-2:BS
+Bahrain|BH|BHR|048|ISO 3166-2:BH
+Bangladesh|BD|BGD|050|ISO 3166-2:BD
+Barbados|BB|BRB|052|ISO 3166-2:BB
+Belarus|BY|BLR|112|ISO 3166-2:BY
+Belgium|BE|BEL|056|ISO 3166-2:BE
+Belize|BZ|BLZ|084|ISO 3166-2:BZ
+Benin|BJ|BEN|204|ISO 3166-2:BJ
+Bermuda|BM|BMU|060|ISO 3166-2:BM
+Bhutan|BT|BTN|064|ISO 3166-2:BT
+Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
+Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
+Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
+Botswana|BW|BWA|072|ISO 3166-2:BW
+Bouvet Island|BV|BVT|074|ISO 3166-2:BV
+Brazil|BR|BRA|076|ISO 3166-2:BR
+British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
+Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
+Bulgaria|BG|BGR|100|ISO 3166-2:BG
+Burkina Faso|BF|BFA|854|ISO 3166-2:BF
+Burundi|BI|BDI|108|ISO 3166-2:BI
+Cambodia|KH|KHM|116|ISO 3166-2:KH
+Cameroon|CM|CMR|120|ISO 3166-2:CM
+Canada|CA|CAN|124|ISO 3166-2:CA
+Cape Verde|CV|CPV|132|ISO 3166-2:CV
+Cayman Islands|KY|CYM|136|ISO 3166-2:KY
+Central African Republic|CF|CAF|140|ISO 3166-2:CF
+Chad|TD|TCD|148|ISO 3166-2:TD
+Chile|CL|CHL|152|ISO 3166-2:CL
+China|CN|CHN|156|ISO 3166-2:CN
+Christmas Island|CX|CXR|162|ISO 3166-2:CX
+Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
+Colombia|CO|COL|170|ISO 3166-2:CO
+Comoros|KM|COM|174|ISO 3166-2:KM
+Congo|CG|COG|178|ISO 3166-2:CG
+Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
+Cook Islands|CK|COK|184|ISO 3166-2:CK
+Costa Rica|CR|CRI|188|ISO 3166-2:CR
+Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
+Croatia|HR|HRV|191|ISO 3166-2:HR
+Cuba|CU|CUB|192|ISO 3166-2:CU
+Curaçao|CW|CUW|531|ISO 3166-2:CW
+Cyprus|CY|CYP|196|ISO 3166-2:CY
+Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
+Denmark|DK|DNK|208|ISO 3166-2:DK
+Djibouti|DJ|DJI|262|ISO 3166-2:DJ
+Dominica|DM|DMA|212|ISO 3166-2:DM
+Dominican Republic|DO|DOM|214|ISO 3166-2:DO
+Ecuador|EC|ECU|218|ISO 3166-2:EC
+Egypt|EG|EGY|818|ISO 3166-2:EG
+El Salvador|SV|SLV|222|ISO 3166-2:SV
+Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
+Eritrea|ER|ERI|232|ISO 3166-2:ER
+Estonia|EE|EST|233|ISO 3166-2:EE
+Ethiopia|ET|ETH|231|ISO 3166-2:ET
+Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
+Faroe Islands|FO|FRO|234|ISO 3166-2:FO
+Fiji|FJ|FJI|242|ISO 3166-2:FJ
+Finland|FI|FIN|246|ISO 3166-2:FI
+France|FR|FRA|250|ISO 3166-2:FR
+French Guiana|GF|GUF|254|ISO 3166-2:GF
+French Polynesia|PF|PYF|258|ISO 3166-2:PF
+French Southern Territories|TF|ATF|260|ISO 3166-2:TF
+Gabon|GA|GAB|266|ISO 3166-2:GA
+Gambia|GM|GMB|270|ISO 3166-2:GM
+Georgia|GE|GEO|268|ISO 3166-2:GE
+Germany|DE|DEU|276|ISO 3166-2:DE
+Ghana|GH|GHA|288|ISO 3166-2:GH
+Gibraltar|GI|GIB|292|ISO 3166-2:GI
+Greece|GR|GRC|300|ISO 3166-2:GR
+Greenland|GL|GRL|304|ISO 3166-2:GL
+Grenada|GD|GRD|308|ISO 3166-2:GD
+Guadeloupe|GP|GLP|312|ISO 3166-2:GP
+Guam|GU|GUM|316|ISO 3166-2:GU
+Guatemala|GT|GTM|320|ISO 3166-2:GT
+Guernsey|GG|GGY|831|ISO 3166-2:GG
+Guinea|GN|GIN|324|ISO 3166-2:GN
+Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
+Guyana|GY|GUY|328|ISO 3166-2:GY
+Haiti|HT|HTI|332|ISO 3166-2:HT
+Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
+Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
+Honduras|HN|HND|340|ISO 3166-2:HN
+Hong Kong|HK|HKG|344|ISO 3166-2:HK
+Hungary|HU|HUN|348|ISO 3166-2:HU
+Iceland|IS|ISL|352|ISO 3166-2:IS
+India|IN|IND|356|ISO 3166-2:IN
+Indonesia|ID|IDN|360|ISO 3166-2:ID
+Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
+Iraq|IQ|IRQ|368|ISO 3166-2:IQ
+Ireland|IE|IRL|372|ISO 3166-2:IE
+Isle of Man|IM|IMN|833|ISO 3166-2:IM
+Israel|IL|ISR|376|ISO 3166-2:IL
+Italy|IT|ITA|380|ISO 3166-2:IT
+Jamaica|JM|JAM|388|ISO 3166-2:JM
+Japan|JP|JPN|392|ISO 3166-2:JP
+Jersey|JE|JEY|832|ISO 3166-2:JE
+Jordan|JO|JOR|400|ISO 3166-2:JO
+Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
+Kenya|KE|KEN|404|ISO 3166-2:KE
+Kiribati|KI|KIR|296|ISO 3166-2:KI
+Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
+Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
+Kuwait|KW|KWT|414|ISO 3166-2:KW
+Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
+Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
+Latvia|LV|LVA|428|ISO 3166-2:LV
+Lebanon|LB|LBN|422|ISO 3166-2:LB
+Lesotho|LS|LSO|426|ISO 3166-2:LS
+Liberia|LR|LBR|430|ISO 3166-2:LR
+Libya|LY|LBY|434|ISO 3166-2:LY
+Liechtenstein|LI|LIE|438|ISO 3166-2:LI
+Lithuania|LT|LTU|440|ISO 3166-2:LT
+Luxembourg|LU|LUX|442|ISO 3166-2:LU
+Macao|MO|MAC|446|ISO 3166-2:MO
+Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
+Madagascar|MG|MDG|450|ISO 3166-2:MG
+Malawi|MW|MWI|454|ISO 3166-2:MW
+Malaysia|MY|MYS|458|ISO 3166-2:MY
+Maldives|MV|MDV|462|ISO 3166-2:MV
+Mali|ML|MLI|466|ISO 3166-2:ML
+Malta|MT|MLT|470|ISO 3166-2:MT
+Marshall Islands|MH|MHL|584|ISO 3166-2:MH
+Martinique|MQ|MTQ|474|ISO 3166-2:MQ
+Mauritania|MR|MRT|478|ISO 3166-2:MR
+Mauritius|MU|MUS|480|ISO 3166-2:MU
+Mayotte|YT|MYT|175|ISO 3166-2:YT
+Mexico|MX|MEX|484|ISO 3166-2:MX
+Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
+Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
+Monaco|MC|MCO|492|ISO 3166-2:MC
+Mongolia|MN|MNG|496|ISO 3166-2:MN
+Montenegro|ME|MNE|499|ISO 3166-2:ME
+Montserrat|MS|MSR|500|ISO 3166-2:MS
+Morocco|MA|MAR|504|ISO 3166-2:MA
+Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
+Myanmar|MM|MMR|104|ISO 3166-2:MM
+Namibia|NA|NAM|516|ISO 3166-2:NA
+Nauru|NR|NRU|520|ISO 3166-2:NR
+Nepal|NP|NPL|524|ISO 3166-2:NP
+Netherlands|NL|NLD|528|ISO 3166-2:NL
+New Caledonia|NC|NCL|540|ISO 3166-2:NC
+New Zealand|NZ|NZL|554|ISO 3166-2:NZ
+Nicaragua|NI|NIC|558|ISO 3166-2:NI
+Niger|NE|NER|562|ISO 3166-2:NE
+Nigeria|NG|NGA|566|ISO 3166-2:NG
+Niue|NU|NIU|570|ISO 3166-2:NU
+Norfolk Island|NF|NFK|574|ISO 3166-2:NF
+Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
+Norway|NO|NOR|578|ISO 3166-2:NO
+Oman|OM|OMN|512|ISO 3166-2:OM
+Pakistan|PK|PAK|586|ISO 3166-2:PK
+Palau|PW|PLW|585|ISO 3166-2:PW
+Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
+Panama|PA|PAN|591|ISO 3166-2:PA
+Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
+Paraguay|PY|PRY|600|ISO 3166-2:PY
+Peru|PE|PER|604|ISO 3166-2:PE
+Philippines|PH|PHL|608|ISO 3166-2:PH
+Pitcairn|PN|PCN|612|ISO 3166-2:PN
+Poland|PL|POL|616|ISO 3166-2:PL
+Portugal|PT|PRT|620|ISO 3166-2:PT
+Puerto Rico|PR|PRI|630|ISO 3166-2:PR
+Qatar|QA|QAT|634|ISO 3166-2:QA
+Réunion|RE|REU|638|ISO 3166-2:RE
+Romania|RO|ROU|642|ISO 3166-2:RO
+Russian Federation|RU|RUS|643|ISO 3166-2:RU
+Rwanda|RW|RWA|646|ISO 3166-2:RW
+Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
+Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
+Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
+Saint Lucia|LC|LCA|662|ISO 3166-2:LC
+Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
+Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
+Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
+Samoa|WS|WSM|882|ISO 3166-2:WS
+San Marino|SM|SMR|674|ISO 3166-2:SM
+Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
+Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
+Senegal|SN|SEN|686|ISO 3166-2:SN
+Serbia|RS|SRB|688|ISO 3166-2:RS
+Seychelles|SC|SYC|690|ISO 3166-2:SC
+Sierra Leone|SL|SLE|694|ISO 3166-2:SL
+Singapore|SG|SGP|702|ISO 3166-2:SG
+Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
+Slovakia|SK|SVK|703|ISO 3166-2:SK
+Slovenia|SI|SVN|705|ISO 3166-2:SI
+Solomon Islands|SB|SLB|090|ISO 3166-2:SB
+Somalia|SO|SOM|706|ISO 3166-2:SO
+South Africa|ZA|ZAF|710|ISO 3166-2:ZA
+South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
+South Sudan|SS|SSD|728|ISO 3166-2:SS
+Spain|ES|ESP|724|ISO 3166-2:ES
+Sri Lanka|LK|LKA|144|ISO 3166-2:LK
+Sudan|SD|SDN|729|ISO 3166-2:SD
+Suriname|SR|SUR|740|ISO 3166-2:SR
+Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
+Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
+Sweden|SE|SWE|752|ISO 3166-2:SE
+Switzerland|CH|CHE|756|ISO 3166-2:CH
+Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
+Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
+Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
+Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
+Thailand|TH|THA|764|ISO 3166-2:TH
+Timor-Leste|TL|TLS|626|ISO 3166-2:TL
+Togo|TG|TGO|768|ISO 3166-2:TG
+Tokelau|TK|TKL|772|ISO 3166-2:TK
+Tonga|TO|TON|776|ISO 3166-2:TO
+Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
+Tunisia|TN|TUN|788|ISO 3166-2:TN
+Turkey|TR|TUR|792|ISO 3166-2:TR
+Turkmenistan|TM|TKM|795|ISO 3166-2:TM
+Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
+Tuvalu|TV|TUV|798|ISO 3166-2:TV
+Uganda|UG|UGA|800|ISO 3166-2:UG
+Ukraine|UA|UKR|804|ISO 3166-2:UA
+United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
+United Kingdom|GB|GBR|826|ISO 3166-2:GB
+United States|US|USA|840|ISO 3166-2:US
+United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
+Uruguay|UY|URY|858|ISO 3166-2:UY
+Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
+Vanuatu|VU|VUT|548|ISO 3166-2:VU
+Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
+Viet Nam|VN|VNM|704|ISO 3166-2:VN
+Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
+Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
+Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
+Western Sahara|EH|ESH|732|ISO 3166-2:EH
+Yemen|YE|YEM|887|ISO 3166-2:YE
+Zambia|ZM|ZMB|894|ISO 3166-2:ZM
+Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW
diff --git a/libs/guessit/__init__.py b/libs/guessit/__init__.py
index a86f71b..9c7c9d0 100644
--- a/libs/guessit/__init__.py
+++ b/libs/guessit/__init__.py
@@ -18,7 +18,7 @@
# along with this program. If not, see .
#
-__version__ = '0.3.1'
+__version__ = '0.4'
__all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info']
@@ -29,7 +29,7 @@ from guessit.language import Language
from guessit.matcher import IterativeMatcher
import logging
-log = logging.getLogger("guessit")
+log = logging.getLogger(__name__)
class NullHandler(logging.Handler):
diff --git a/libs/guessit/country.py b/libs/guessit/country.py
new file mode 100644
index 0000000..f529728
--- /dev/null
+++ b/libs/guessit/country.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# GuessIt - A library for guessing information from filenames
+# Copyright (c) 2012 Nicolas Wack
+#
+# GuessIt is free software; you can redistribute it and/or modify it under
+# the terms of the Lesser GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GuessIt is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Lesser GNU General Public License for more details.
+#
+# You should have received a copy of the Lesser GNU General Public License
+# along with this program. If not, see .
+#
+
+from __future__ import unicode_literals
+from guessit import fileutils
+import logging
+
+log = logging.getLogger(__name__)
+
+
+# parsed from http://en.wikipedia.org/wiki/ISO_3166-1
+#
+# Description of the fields:
+# "An English name, an alpha-2 code (when given),
+# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
+# are all separated by pipe (|) characters."
+_iso3166_contents = fileutils.load_file_in_same_dir(__file__,
+ 'ISO-3166-1_utf8.txt').decode('utf-8')
+
+country_matrix = [ l.strip().split('|')
+ for l in _iso3166_contents.strip().split('\n') ]
+
+country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
+ [ 'Latin America', '', 'lat', '', '' ]
+ ]
+
+country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
+country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
+country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
+
+# add here exceptions / non ISO representations
+# Note: remember to put those exceptions in lower-case, they won't work otherwise
+country_to_alpha3.update({ 'latinoamérica': 'lat',
+ 'brazilian': 'bra',
+ 'españa': 'esp',
+ 'uk': 'gbr'
+ })
+
+country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
+country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
+
+
+
+class Country(object):
+ """This class represents a country.
+
+ You can initialize it with pretty much anything, as it knows conversion
+ from ISO-3166 2-letter and 3-letter codes, and an English name.
+ """
+
+ def __init__(self, country, strict=False):
+ self.alpha3 = country_to_alpha3.get(country.lower())
+
+ if self.alpha3 is None and strict:
+ msg = 'The given string "%s" could not be identified as a country'
+ raise ValueError(msg % country)
+
+ if self.alpha3 is None:
+ self.alpha3 = 'unk'
+
+
+ @property
+ def alpha2(self):
+ return country_alpha3_to_alpha2[self.alpha3]
+
+ @property
+ def english_name(self):
+ return country_alpha3_to_en_name[self.alpha3]
+
+ def __hash__(self):
+ return hash(self.alpha3)
+
+ def __eq__(self, other):
+ if isinstance(other, Country):
+ return self.alpha3 == other.alpha3
+
+ if isinstance(other, basestring):
+ try:
+ return self == Country(other)
+ except ValueError:
+ return False
+
+ return False
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __unicode__(self):
+ return self.english_name
+
+ def __str__(self):
+ return unicode(self).encode('utf-8')
+
+ def __repr__(self):
+ return 'Country(%s)' % self.english_name
+
diff --git a/libs/guessit/guess.py b/libs/guessit/guess.py
index 9950a12..e25ca1f 100644
--- a/libs/guessit/guess.py
+++ b/libs/guessit/guess.py
@@ -22,7 +22,7 @@ import json
import datetime
import logging
-log = logging.getLogger("guessit.guess")
+log = logging.getLogger(__name__)
class Guess(dict):
diff --git a/libs/guessit/language.py b/libs/guessit/language.py
index 777d0e2..b043346 100644
--- a/libs/guessit/language.py
+++ b/libs/guessit/language.py
@@ -18,10 +18,18 @@
# along with this program. If not, see .
#
+from __future__ import unicode_literals
from guessit import fileutils
+from guessit.country import Country
+import re
import logging
-log = logging.getLogger('guessit.language')
+__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
+ 'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'search_language' ]
+
+
+log = logging.getLogger(__name__)
+
# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
#
@@ -30,9 +38,23 @@ log = logging.getLogger('guessit.language')
# an alpha-2 code (when given), an English name, and a French name of a language
# are all separated by pipe (|) characters."
_iso639_contents = fileutils.load_file_in_same_dir(__file__,
- 'ISO-639-2_utf-8.txt')
-language_matrix = [ l.strip().decode('utf-8').split('|')
- for l in _iso639_contents.split('\n') ]
+ 'ISO-639-2_utf-8.txt').decode('utf-8')
+
+# drop the BOM from the beginning of the file
+_iso639_contents = _iso639_contents[1:]
+
+language_matrix = [ l.strip().split('|')
+ for l in _iso639_contents.strip().split('\n') ]
+
+language_matrix += [ [ 'unk', '', 'un', 'Unknown', 'inconnu' ] ]
+
+
+# remove unused languages that shadow other common ones with a non-official form
+for lang in language_matrix:
+ if (lang[2] == 'se' or # Northern Sami shadows Swedish
+ lang[2] == 'br'): # Breton shadows Brazilian
+ language_matrix.remove(lang)
+
lng3 = frozenset(l[0] for l in language_matrix if l[0])
lng3term = frozenset(l[1] for l in language_matrix if l[1])
@@ -63,54 +85,126 @@ lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
for l in language_matrix if l[4]
for fr_name in l[4].split('; '))
+# contains a list of exceptions: strings that should be parsed as a language
+# but which are not in an ISO form
+lng_exceptions = { 'gr': ('gre', None),
+ 'greek': ('gre', None),
+ 'esp': ('spa', None),
+ 'español': ('spa', None),
+ 'se': ('swe', None),
+ 'po': ('pt', 'br'),
+ 'pob': ('pt', 'br'),
+ 'br': ('pt', 'br'),
+ 'brazilian': ('pt', 'br'),
+ 'català': ('cat', None),
+ 'cz': ('cze', None),
+ 'ua': ('ukr', None),
+ 'cn': ('chi', None),
+ 'chs': ('chi', None),
+ 'jp': ('jpn', None)
+ }
+
+
+def is_iso_language(language):
+ return language.lower() in lng_all_names
def is_language(language):
- return language.lower() in lng_all_names
+ return is_iso_language(language) or language in lng_exceptions
+
+def lang_set(languages, strict=False):
+ """Return a set of guessit.Language created from their given string
+ representation.
+
+ if strict is True, then this will raise an exception if any language
+ could not be identified.
+ """
+ return set(Language(l, strict=strict) for l in languages)
class Language(object):
"""This class represents a human language.
- You can initialize it with pretty much everything, as it knows conversion
+ You can initialize it with pretty much anything, as it knows conversion
from ISO-639 2-letter and 3-letter codes, English and French names.
+ You can also distinguish languages for specific countries, such as
+ Portuguese and Brazilian Portuguese.
+
>>> Language('fr')
Language(French)
- >>> Language('eng').french_name()
+ >>> Language('eng').french_name
u'anglais'
+
+ >>> Language('pt(br)').country.english_name
+ u'Brazil'
+
+ >>> Language('Español (Latinoamérica)').country.english_name
+ u'Latin America'
+
+ >>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
+ True
+
+ >>> Language('zz', strict=False).english_name
+ u'Unknown'
"""
- def __init__(self, language):
- lang = None
- language = language.lower()
+
+ _with_country_regexp = re.compile('(.*)\((.*)\)')
+
+ def __init__(self, language, country=None, strict=False):
+ language = language.strip().lower()
+ if isinstance(language, str):
+ language = language.decode('utf-8')
+ with_country = Language._with_country_regexp.match(language)
+ if with_country:
+ self.lang = Language(with_country.group(1)).lang
+ self.country = Country(with_country.group(2))
+ return
+
+ self.lang = None
+ self.country = Country(country) if country else None
+
if len(language) == 2:
- lang = lng2_to_lng3.get(language)
+ self.lang = lng2_to_lng3.get(language)
elif len(language) == 3:
- lang = (language
- if language in lng3
- else lng3term_to_lng3.get(language))
+ self.lang = (language
+ if language in lng3
+ else lng3term_to_lng3.get(language))
else:
- lang = (lng_en_name_to_lng3.get(language) or
- lng_fr_name_to_lng3.get(language))
+ self.lang = (lng_en_name_to_lng3.get(language) or
+ lng_fr_name_to_lng3.get(language))
- if lang is None:
- msg = 'The given string "%s" could not be identified as a language'
- raise ValueError(msg % language)
+ if self.lang is None and language in lng_exceptions:
+ lang, country = lng_exceptions[language]
+ self.lang = Language(lang).alpha3
+ self.country = Country(country) if country else None
- self.lang = lang
+ msg = 'The given string "%s" could not be identified as a language' % language
- def lng2(self):
+ if self.lang is None and strict:
+ raise ValueError(msg)
+
+ if self.lang is None:
+ log.debug(msg)
+ self.lang = 'unk'
+
+ @property
+ def alpha2(self):
return lng3_to_lng2[self.lang]
- def lng3(self):
+ @property
+ def alpha3(self):
return self.lang
- def lng3term(self):
+ @property
+ def alpha3term(self):
return lng3_to_lng3term[self.lang]
+ @property
def english_name(self):
return lng3_to_lng_en_name[self.lang]
+ @property
def french_name(self):
return lng3_to_lng_fr_name[self.lang]
@@ -132,15 +226,27 @@ class Language(object):
def __ne__(self, other):
return not self == other
+ def __nonzero__(self):
+ return self.lang != 'unk'
+
def __unicode__(self):
- return lng3_to_lng_en_name[self.lang]
+ if self.country:
+ return '%s(%s)' % (self.english_name, self.country.alpha2)
+ else:
+ return self.english_name
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
- return 'Language(%s)' % self
+ if self.country:
+ return 'Language(%s, country=%s)' % (self.english_name, self.country)
+ else:
+ return 'Language(%s)' % self.english_name
+
+ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([Language('unk')])
+ALL_LANGUAGES_NAMES = lng_all_names
def search_language(string, lang_filter=None):
"""Looks for language patterns, and if found return the language object,
@@ -177,7 +283,7 @@ def search_language(string, lang_filter=None):
sep = r'[](){} \._-+'
if lang_filter:
- lang_filter = set(Language(l) for l in lang_filter)
+ lang_filter = lang_set(lang_filter)
slow = ' %s ' % string.lower()
confidence = 1.0 # for all of them
diff --git a/libs/guessit/matcher.py b/libs/guessit/matcher.py
index cac172d..b0a5040 100644
--- a/libs/guessit/matcher.py
+++ b/libs/guessit/matcher.py
@@ -25,7 +25,7 @@ from guessit.guess import (merge_similar_guesses, merge_all,
import copy
import logging
-log = logging.getLogger("guessit.matcher")
+log = logging.getLogger(__name__)
class IterativeMatcher(object):
@@ -105,7 +105,7 @@ class IterativeMatcher(object):
'guess_release_group', 'guess_properties',
'guess_weak_episodes_rexps', 'guess_language']
else:
- strategy = ['guess_date', 'guess_year', 'guess_video_rexps',
+ strategy = ['guess_date', 'guess_video_rexps',
'guess_website', 'guess_release_group',
'guess_properties', 'guess_language']
@@ -125,6 +125,7 @@ class IterativeMatcher(object):
if mtree.guess['type'] in ('episode', 'episodesubtitle'):
apply_transfo('guess_episode_info_from_position')
else:
+ apply_transfo('guess_year')
apply_transfo('guess_movie_title_from_position')
# 6- perform some post-processing steps
diff --git a/libs/guessit/matchtree.py b/libs/guessit/matchtree.py
index 634cbf7..466e0bb 100644
--- a/libs/guessit/matchtree.py
+++ b/libs/guessit/matchtree.py
@@ -23,7 +23,7 @@ from guessit.textutils import clean_string, str_fill, to_utf8
from guessit.patterns import group_delimiters
import logging
-log = logging.getLogger("guessit.matchtree")
+log = logging.getLogger(__name__)
class BaseMatchTree(object):
diff --git a/libs/guessit/patterns.py b/libs/guessit/patterns.py
index 4125fb7..4223585 100755
--- a/libs/guessit/patterns.py
+++ b/libs/guessit/patterns.py
@@ -22,8 +22,9 @@
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa', 'txt' ]
-video_exts = [ 'avi', 'mkv', 'mpg', 'mp4', 'm4v', 'mov', 'ogg', 'ogm', 'ogv',
- 'wmv', 'divx' ]
+video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
+ 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
+ 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
group_delimiters = [ '()', '[]', '{}' ]
@@ -62,6 +63,8 @@ weak_episode_rexps = [ # ... 213 or 0106 ...
# ... 2x13 ...
(sep + r'[^0-9](?P[0-9]{1,2})\.(?P[0-9]{2})[^0-9]' + sep, (1, -1)),
+ # ... e13 ... for a mini-series without a season number
+ (r'e(?P[0-9]{1,4})[^0-9]', (0, -1)),
]
non_episode_title = [ 'extras', 'rip' ]
diff --git a/libs/guessit/transfo/__init__.py b/libs/guessit/transfo/__init__.py
index eb72beb..1bdd09b 100644
--- a/libs/guessit/transfo/__init__.py
+++ b/libs/guessit/transfo/__init__.py
@@ -23,7 +23,7 @@ from guessit.patterns import canonical_form
from guessit.textutils import clean_string
import logging
-log = logging.getLogger('guessit.transfo')
+log = logging.getLogger(__name__)
def found_property(node, name, confidence):
diff --git a/libs/guessit/transfo/guess_bonus_features.py b/libs/guessit/transfo/guess_bonus_features.py
index dcb90b3..73fc7b4 100644
--- a/libs/guessit/transfo/guess_bonus_features.py
+++ b/libs/guessit/transfo/guess_bonus_features.py
@@ -21,7 +21,7 @@
from guessit.transfo import found_property
import logging
-log = logging.getLogger("guessit.transfo.guess_bonus_features")
+log = logging.getLogger(__name__)
def process(mtree):
diff --git a/libs/guessit/transfo/guess_date.py b/libs/guessit/transfo/guess_date.py
index c72d66a..ded8094 100644
--- a/libs/guessit/transfo/guess_date.py
+++ b/libs/guessit/transfo/guess_date.py
@@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.date import search_date
import logging
-log = logging.getLogger("guessit.transfo.guess_date")
+log = logging.getLogger(__name__)
def guess_date(string):
diff --git a/libs/guessit/transfo/guess_episode_info_from_position.py b/libs/guessit/transfo/guess_episode_info_from_position.py
index fe1a752..7b4f43f 100644
--- a/libs/guessit/transfo/guess_episode_info_from_position.py
+++ b/libs/guessit/transfo/guess_episode_info_from_position.py
@@ -22,7 +22,7 @@ from guessit.transfo import found_property
from guessit.patterns import non_episode_title, unlikely_series
import logging
-log = logging.getLogger("guessit.transfo.guess_episode_info_from_position")
+log = logging.getLogger(__name__)
def match_from_epnum_position(mtree, node):
@@ -112,6 +112,9 @@ def process(mtree):
if len(title_candidates) >= 2:
found_property(title_candidates[0], 'series', 0.4)
found_property(title_candidates[1], 'title', 0.4)
+ elif len(title_candidates) == 1:
+ # but if there's only one candidate, it's probably the series name
+ found_property(title_candidates[0], 'series', 0.4)
# if we only have 1 remaining valid group in the folder containing the
# file, then it's likely that it is the series name
diff --git a/libs/guessit/transfo/guess_episodes_rexps.py b/libs/guessit/transfo/guess_episodes_rexps.py
index 46dbc59..dfaa944 100644
--- a/libs/guessit/transfo/guess_episodes_rexps.py
+++ b/libs/guessit/transfo/guess_episodes_rexps.py
@@ -24,7 +24,7 @@ from guessit.patterns import episode_rexps
import re
import logging
-log = logging.getLogger("guessit.transfo.guess_episodes_rexps")
+log = logging.getLogger(__name__)
def guess_episodes_rexps(string):
diff --git a/libs/guessit/transfo/guess_filetype.py b/libs/guessit/transfo/guess_filetype.py
index 32bdc13..bf0a80a 100644
--- a/libs/guessit/transfo/guess_filetype.py
+++ b/libs/guessit/transfo/guess_filetype.py
@@ -26,7 +26,7 @@ import re
import mimetypes
import logging
-log = logging.getLogger("guessit.transfo.guess_filetype")
+log = logging.getLogger(__name__)
def guess_filetype(filename, filetype):
diff --git a/libs/guessit/transfo/guess_language.py b/libs/guessit/transfo/guess_language.py
index 62f47d8..aa1431b 100644
--- a/libs/guessit/transfo/guess_language.py
+++ b/libs/guessit/transfo/guess_language.py
@@ -24,7 +24,7 @@ from guessit.language import search_language
from guessit.textutils import clean_string
import logging
-log = logging.getLogger("guessit.transfo.guess_language")
+log = logging.getLogger(__name__)
def guess_language(string):
diff --git a/libs/guessit/transfo/guess_movie_title_from_position.py b/libs/guessit/transfo/guess_movie_title_from_position.py
index dea56d6..55289c8 100644
--- a/libs/guessit/transfo/guess_movie_title_from_position.py
+++ b/libs/guessit/transfo/guess_movie_title_from_position.py
@@ -21,7 +21,7 @@
from guessit import Guess
import logging
-log = logging.getLogger("guessit.transfo.guess_movie_title_from_position")
+log = logging.getLogger(__name__)
def process(mtree):
diff --git a/libs/guessit/transfo/guess_properties.py b/libs/guessit/transfo/guess_properties.py
index 3822d22..02d0cad 100644
--- a/libs/guessit/transfo/guess_properties.py
+++ b/libs/guessit/transfo/guess_properties.py
@@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.patterns import find_properties
import logging
-log = logging.getLogger("guessit.transfo.guess_properties")
+log = logging.getLogger(__name__)
def guess_properties(string):
diff --git a/libs/guessit/transfo/guess_release_group.py b/libs/guessit/transfo/guess_release_group.py
index 9ec609d..54a7148 100644
--- a/libs/guessit/transfo/guess_release_group.py
+++ b/libs/guessit/transfo/guess_release_group.py
@@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
import re
import logging
-log = logging.getLogger("guessit.transfo.guess_release_group")
+log = logging.getLogger(__name__)
def guess_release_group(string):
diff --git a/libs/guessit/transfo/guess_video_rexps.py b/libs/guessit/transfo/guess_video_rexps.py
index 36723c8..697a6af 100644
--- a/libs/guessit/transfo/guess_video_rexps.py
+++ b/libs/guessit/transfo/guess_video_rexps.py
@@ -24,7 +24,7 @@ from guessit.patterns import video_rexps, sep
import re
import logging
-log = logging.getLogger("guessit.transfo.guess_video_rexps")
+log = logging.getLogger(__name__)
def guess_video_rexps(string):
diff --git a/libs/guessit/transfo/guess_weak_episodes_rexps.py b/libs/guessit/transfo/guess_weak_episodes_rexps.py
index 8fffe17..57c9f44 100644
--- a/libs/guessit/transfo/guess_weak_episodes_rexps.py
+++ b/libs/guessit/transfo/guess_weak_episodes_rexps.py
@@ -24,7 +24,7 @@ from guessit.patterns import weak_episode_rexps
import re
import logging
-log = logging.getLogger("guessit.transfo.guess_weak_episodes_rexps")
+log = logging.getLogger(__name__)
def guess_weak_episodes_rexps(string, node):
diff --git a/libs/guessit/transfo/guess_website.py b/libs/guessit/transfo/guess_website.py
index a169f97..638f7d2 100644
--- a/libs/guessit/transfo/guess_website.py
+++ b/libs/guessit/transfo/guess_website.py
@@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.patterns import websites
import logging
-log = logging.getLogger("guessit.transfo.guess_website")
+log = logging.getLogger(__name__)
def guess_website(string):
diff --git a/libs/guessit/transfo/guess_year.py b/libs/guessit/transfo/guess_year.py
index 7a47ecf..7a90111 100644
--- a/libs/guessit/transfo/guess_year.py
+++ b/libs/guessit/transfo/guess_year.py
@@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.date import search_year
import logging
-log = logging.getLogger("guessit.transfo.guess_year")
+log = logging.getLogger(__name__)
def guess_year(string):
diff --git a/libs/guessit/transfo/post_process.py b/libs/guessit/transfo/post_process.py
index 0b5a4df..f08bbb2 100644
--- a/libs/guessit/transfo/post_process.py
+++ b/libs/guessit/transfo/post_process.py
@@ -21,7 +21,7 @@
from guessit.patterns import subtitle_exts
import logging
-log = logging.getLogger("guessit.transfo.post_process")
+log = logging.getLogger(__name__)
def process(mtree):
diff --git a/libs/guessit/transfo/split_explicit_groups.py b/libs/guessit/transfo/split_explicit_groups.py
index 797a886..f99ff19 100644
--- a/libs/guessit/transfo/split_explicit_groups.py
+++ b/libs/guessit/transfo/split_explicit_groups.py
@@ -22,7 +22,7 @@ from guessit.textutils import find_first_level_groups
from guessit.patterns import group_delimiters
import logging
-log = logging.getLogger("guessit.transfo.split_explicit_groups")
+log = logging.getLogger(__name__)
def process(mtree):
diff --git a/libs/guessit/transfo/split_on_dash.py b/libs/guessit/transfo/split_on_dash.py
index fc10c49..0f2c34b 100644
--- a/libs/guessit/transfo/split_on_dash.py
+++ b/libs/guessit/transfo/split_on_dash.py
@@ -22,7 +22,7 @@ from guessit.patterns import sep
import re
import logging
-log = logging.getLogger("guessit.transfo.split_on_dash")
+log = logging.getLogger(__name__)
def process(mtree):
diff --git a/libs/guessit/transfo/split_path_components.py b/libs/guessit/transfo/split_path_components.py
index 0f8d1a5..9f7ec9b 100644
--- a/libs/guessit/transfo/split_path_components.py
+++ b/libs/guessit/transfo/split_path_components.py
@@ -22,7 +22,7 @@ from guessit import fileutils
import os.path
import logging
-log = logging.getLogger("guessit.transfo.split_path_components")
+log = logging.getLogger(__name__)
def process(mtree):