Browse Source

Update guessit

tags/build/2.0.0.pre1
Ruud 13 years ago
parent
commit
f934ef2ede
  1. 249
      libs/guessit/ISO-3166-1_utf8.txt
  2. 4
      libs/guessit/__init__.py
  3. 113
      libs/guessit/country.py
  4. 2
      libs/guessit/guess.py
  5. 158
      libs/guessit/language.py
  6. 5
      libs/guessit/matcher.py
  7. 2
      libs/guessit/matchtree.py
  8. 7
      libs/guessit/patterns.py
  9. 2
      libs/guessit/transfo/__init__.py
  10. 2
      libs/guessit/transfo/guess_bonus_features.py
  11. 2
      libs/guessit/transfo/guess_date.py
  12. 5
      libs/guessit/transfo/guess_episode_info_from_position.py
  13. 2
      libs/guessit/transfo/guess_episodes_rexps.py
  14. 2
      libs/guessit/transfo/guess_filetype.py
  15. 2
      libs/guessit/transfo/guess_language.py
  16. 2
      libs/guessit/transfo/guess_movie_title_from_position.py
  17. 2
      libs/guessit/transfo/guess_properties.py
  18. 2
      libs/guessit/transfo/guess_release_group.py
  19. 2
      libs/guessit/transfo/guess_video_rexps.py
  20. 2
      libs/guessit/transfo/guess_weak_episodes_rexps.py
  21. 2
      libs/guessit/transfo/guess_website.py
  22. 2
      libs/guessit/transfo/guess_year.py
  23. 2
      libs/guessit/transfo/post_process.py
  24. 2
      libs/guessit/transfo/split_explicit_groups.py
  25. 2
      libs/guessit/transfo/split_on_dash.py
  26. 2
      libs/guessit/transfo/split_path_components.py

249
libs/guessit/ISO-3166-1_utf8.txt

@ -0,0 +1,249 @@
Afghanistan|AF|AFG|004|ISO 3166-2:AF
Åland Islands|AX|ALA|248|ISO 3166-2:AX
Albania|AL|ALB|008|ISO 3166-2:AL
Algeria|DZ|DZA|012|ISO 3166-2:DZ
American Samoa|AS|ASM|016|ISO 3166-2:AS
Andorra|AD|AND|020|ISO 3166-2:AD
Angola|AO|AGO|024|ISO 3166-2:AO
Anguilla|AI|AIA|660|ISO 3166-2:AI
Antarctica|AQ|ATA|010|ISO 3166-2:AQ
Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
Argentina|AR|ARG|032|ISO 3166-2:AR
Armenia|AM|ARM|051|ISO 3166-2:AM
Aruba|AW|ABW|533|ISO 3166-2:AW
Australia|AU|AUS|036|ISO 3166-2:AU
Austria|AT|AUT|040|ISO 3166-2:AT
Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
Bahamas|BS|BHS|044|ISO 3166-2:BS
Bahrain|BH|BHR|048|ISO 3166-2:BH
Bangladesh|BD|BGD|050|ISO 3166-2:BD
Barbados|BB|BRB|052|ISO 3166-2:BB
Belarus|BY|BLR|112|ISO 3166-2:BY
Belgium|BE|BEL|056|ISO 3166-2:BE
Belize|BZ|BLZ|084|ISO 3166-2:BZ
Benin|BJ|BEN|204|ISO 3166-2:BJ
Bermuda|BM|BMU|060|ISO 3166-2:BM
Bhutan|BT|BTN|064|ISO 3166-2:BT
Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
Botswana|BW|BWA|072|ISO 3166-2:BW
Bouvet Island|BV|BVT|074|ISO 3166-2:BV
Brazil|BR|BRA|076|ISO 3166-2:BR
British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
Bulgaria|BG|BGR|100|ISO 3166-2:BG
Burkina Faso|BF|BFA|854|ISO 3166-2:BF
Burundi|BI|BDI|108|ISO 3166-2:BI
Cambodia|KH|KHM|116|ISO 3166-2:KH
Cameroon|CM|CMR|120|ISO 3166-2:CM
Canada|CA|CAN|124|ISO 3166-2:CA
Cape Verde|CV|CPV|132|ISO 3166-2:CV
Cayman Islands|KY|CYM|136|ISO 3166-2:KY
Central African Republic|CF|CAF|140|ISO 3166-2:CF
Chad|TD|TCD|148|ISO 3166-2:TD
Chile|CL|CHL|152|ISO 3166-2:CL
China|CN|CHN|156|ISO 3166-2:CN
Christmas Island|CX|CXR|162|ISO 3166-2:CX
Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
Colombia|CO|COL|170|ISO 3166-2:CO
Comoros|KM|COM|174|ISO 3166-2:KM
Congo|CG|COG|178|ISO 3166-2:CG
Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
Cook Islands|CK|COK|184|ISO 3166-2:CK
Costa Rica|CR|CRI|188|ISO 3166-2:CR
Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
Croatia|HR|HRV|191|ISO 3166-2:HR
Cuba|CU|CUB|192|ISO 3166-2:CU
Curaçao|CW|CUW|531|ISO 3166-2:CW
Cyprus|CY|CYP|196|ISO 3166-2:CY
Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
Denmark|DK|DNK|208|ISO 3166-2:DK
Djibouti|DJ|DJI|262|ISO 3166-2:DJ
Dominica|DM|DMA|212|ISO 3166-2:DM
Dominican Republic|DO|DOM|214|ISO 3166-2:DO
Ecuador|EC|ECU|218|ISO 3166-2:EC
Egypt|EG|EGY|818|ISO 3166-2:EG
El Salvador|SV|SLV|222|ISO 3166-2:SV
Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
Eritrea|ER|ERI|232|ISO 3166-2:ER
Estonia|EE|EST|233|ISO 3166-2:EE
Ethiopia|ET|ETH|231|ISO 3166-2:ET
Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
Faroe Islands|FO|FRO|234|ISO 3166-2:FO
Fiji|FJ|FJI|242|ISO 3166-2:FJ
Finland|FI|FIN|246|ISO 3166-2:FI
France|FR|FRA|250|ISO 3166-2:FR
French Guiana|GF|GUF|254|ISO 3166-2:GF
French Polynesia|PF|PYF|258|ISO 3166-2:PF
French Southern Territories|TF|ATF|260|ISO 3166-2:TF
Gabon|GA|GAB|266|ISO 3166-2:GA
Gambia|GM|GMB|270|ISO 3166-2:GM
Georgia|GE|GEO|268|ISO 3166-2:GE
Germany|DE|DEU|276|ISO 3166-2:DE
Ghana|GH|GHA|288|ISO 3166-2:GH
Gibraltar|GI|GIB|292|ISO 3166-2:GI
Greece|GR|GRC|300|ISO 3166-2:GR
Greenland|GL|GRL|304|ISO 3166-2:GL
Grenada|GD|GRD|308|ISO 3166-2:GD
Guadeloupe|GP|GLP|312|ISO 3166-2:GP
Guam|GU|GUM|316|ISO 3166-2:GU
Guatemala|GT|GTM|320|ISO 3166-2:GT
Guernsey|GG|GGY|831|ISO 3166-2:GG
Guinea|GN|GIN|324|ISO 3166-2:GN
Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
Guyana|GY|GUY|328|ISO 3166-2:GY
Haiti|HT|HTI|332|ISO 3166-2:HT
Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
Honduras|HN|HND|340|ISO 3166-2:HN
Hong Kong|HK|HKG|344|ISO 3166-2:HK
Hungary|HU|HUN|348|ISO 3166-2:HU
Iceland|IS|ISL|352|ISO 3166-2:IS
India|IN|IND|356|ISO 3166-2:IN
Indonesia|ID|IDN|360|ISO 3166-2:ID
Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
Iraq|IQ|IRQ|368|ISO 3166-2:IQ
Ireland|IE|IRL|372|ISO 3166-2:IE
Isle of Man|IM|IMN|833|ISO 3166-2:IM
Israel|IL|ISR|376|ISO 3166-2:IL
Italy|IT|ITA|380|ISO 3166-2:IT
Jamaica|JM|JAM|388|ISO 3166-2:JM
Japan|JP|JPN|392|ISO 3166-2:JP
Jersey|JE|JEY|832|ISO 3166-2:JE
Jordan|JO|JOR|400|ISO 3166-2:JO
Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
Kenya|KE|KEN|404|ISO 3166-2:KE
Kiribati|KI|KIR|296|ISO 3166-2:KI
Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
Kuwait|KW|KWT|414|ISO 3166-2:KW
Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
Latvia|LV|LVA|428|ISO 3166-2:LV
Lebanon|LB|LBN|422|ISO 3166-2:LB
Lesotho|LS|LSO|426|ISO 3166-2:LS
Liberia|LR|LBR|430|ISO 3166-2:LR
Libya|LY|LBY|434|ISO 3166-2:LY
Liechtenstein|LI|LIE|438|ISO 3166-2:LI
Lithuania|LT|LTU|440|ISO 3166-2:LT
Luxembourg|LU|LUX|442|ISO 3166-2:LU
Macao|MO|MAC|446|ISO 3166-2:MO
Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
Madagascar|MG|MDG|450|ISO 3166-2:MG
Malawi|MW|MWI|454|ISO 3166-2:MW
Malaysia|MY|MYS|458|ISO 3166-2:MY
Maldives|MV|MDV|462|ISO 3166-2:MV
Mali|ML|MLI|466|ISO 3166-2:ML
Malta|MT|MLT|470|ISO 3166-2:MT
Marshall Islands|MH|MHL|584|ISO 3166-2:MH
Martinique|MQ|MTQ|474|ISO 3166-2:MQ
Mauritania|MR|MRT|478|ISO 3166-2:MR
Mauritius|MU|MUS|480|ISO 3166-2:MU
Mayotte|YT|MYT|175|ISO 3166-2:YT
Mexico|MX|MEX|484|ISO 3166-2:MX
Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
Monaco|MC|MCO|492|ISO 3166-2:MC
Mongolia|MN|MNG|496|ISO 3166-2:MN
Montenegro|ME|MNE|499|ISO 3166-2:ME
Montserrat|MS|MSR|500|ISO 3166-2:MS
Morocco|MA|MAR|504|ISO 3166-2:MA
Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
Myanmar|MM|MMR|104|ISO 3166-2:MM
Namibia|NA|NAM|516|ISO 3166-2:NA
Nauru|NR|NRU|520|ISO 3166-2:NR
Nepal|NP|NPL|524|ISO 3166-2:NP
Netherlands|NL|NLD|528|ISO 3166-2:NL
New Caledonia|NC|NCL|540|ISO 3166-2:NC
New Zealand|NZ|NZL|554|ISO 3166-2:NZ
Nicaragua|NI|NIC|558|ISO 3166-2:NI
Niger|NE|NER|562|ISO 3166-2:NE
Nigeria|NG|NGA|566|ISO 3166-2:NG
Niue|NU|NIU|570|ISO 3166-2:NU
Norfolk Island|NF|NFK|574|ISO 3166-2:NF
Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
Norway|NO|NOR|578|ISO 3166-2:NO
Oman|OM|OMN|512|ISO 3166-2:OM
Pakistan|PK|PAK|586|ISO 3166-2:PK
Palau|PW|PLW|585|ISO 3166-2:PW
Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
Panama|PA|PAN|591|ISO 3166-2:PA
Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
Paraguay|PY|PRY|600|ISO 3166-2:PY
Peru|PE|PER|604|ISO 3166-2:PE
Philippines|PH|PHL|608|ISO 3166-2:PH
Pitcairn|PN|PCN|612|ISO 3166-2:PN
Poland|PL|POL|616|ISO 3166-2:PL
Portugal|PT|PRT|620|ISO 3166-2:PT
Puerto Rico|PR|PRI|630|ISO 3166-2:PR
Qatar|QA|QAT|634|ISO 3166-2:QA
Réunion|RE|REU|638|ISO 3166-2:RE
Romania|RO|ROU|642|ISO 3166-2:RO
Russian Federation|RU|RUS|643|ISO 3166-2:RU
Rwanda|RW|RWA|646|ISO 3166-2:RW
Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
Saint Lucia|LC|LCA|662|ISO 3166-2:LC
Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
Samoa|WS|WSM|882|ISO 3166-2:WS
San Marino|SM|SMR|674|ISO 3166-2:SM
Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
Senegal|SN|SEN|686|ISO 3166-2:SN
Serbia|RS|SRB|688|ISO 3166-2:RS
Seychelles|SC|SYC|690|ISO 3166-2:SC
Sierra Leone|SL|SLE|694|ISO 3166-2:SL
Singapore|SG|SGP|702|ISO 3166-2:SG
Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
Slovakia|SK|SVK|703|ISO 3166-2:SK
Slovenia|SI|SVN|705|ISO 3166-2:SI
Solomon Islands|SB|SLB|090|ISO 3166-2:SB
Somalia|SO|SOM|706|ISO 3166-2:SO
South Africa|ZA|ZAF|710|ISO 3166-2:ZA
South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
South Sudan|SS|SSD|728|ISO 3166-2:SS
Spain|ES|ESP|724|ISO 3166-2:ES
Sri Lanka|LK|LKA|144|ISO 3166-2:LK
Sudan|SD|SDN|729|ISO 3166-2:SD
Suriname|SR|SUR|740|ISO 3166-2:SR
Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
Sweden|SE|SWE|752|ISO 3166-2:SE
Switzerland|CH|CHE|756|ISO 3166-2:CH
Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
Thailand|TH|THA|764|ISO 3166-2:TH
Timor-Leste|TL|TLS|626|ISO 3166-2:TL
Togo|TG|TGO|768|ISO 3166-2:TG
Tokelau|TK|TKL|772|ISO 3166-2:TK
Tonga|TO|TON|776|ISO 3166-2:TO
Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
Tunisia|TN|TUN|788|ISO 3166-2:TN
Turkey|TR|TUR|792|ISO 3166-2:TR
Turkmenistan|TM|TKM|795|ISO 3166-2:TM
Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
Tuvalu|TV|TUV|798|ISO 3166-2:TV
Uganda|UG|UGA|800|ISO 3166-2:UG
Ukraine|UA|UKR|804|ISO 3166-2:UA
United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
United Kingdom|GB|GBR|826|ISO 3166-2:GB
United States|US|USA|840|ISO 3166-2:US
United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
Uruguay|UY|URY|858|ISO 3166-2:UY
Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
Vanuatu|VU|VUT|548|ISO 3166-2:VU
Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
Viet Nam|VN|VNM|704|ISO 3166-2:VN
Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
Western Sahara|EH|ESH|732|ISO 3166-2:EH
Yemen|YE|YEM|887|ISO 3166-2:YE
Zambia|ZM|ZMB|894|ISO 3166-2:ZM
Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW

4
libs/guessit/__init__.py

@ -18,7 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__version__ = '0.3.1'
__version__ = '0.4'
__all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info']
@ -29,7 +29,7 @@ from guessit.language import Language
from guessit.matcher import IterativeMatcher
import logging
log = logging.getLogger("guessit")
log = logging.getLogger(__name__)
class NullHandler(logging.Handler):

113
libs/guessit/country.py

@ -0,0 +1,113 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import fileutils
import logging
log = logging.getLogger(__name__)
# parsed from http://en.wikipedia.org/wiki/ISO_3166-1
#
# Description of the fields:
# "An English name, an alpha-2 code (when given),
# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
# are all separated by pipe (|) characters."
_iso3166_contents = fileutils.load_file_in_same_dir(__file__,
'ISO-3166-1_utf8.txt').decode('utf-8')
country_matrix = [ l.strip().split('|')
for l in _iso3166_contents.strip().split('\n') ]
country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
[ 'Latin America', '', 'lat', '', '' ]
]
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
# add here exceptions / non ISO representations
# Note: remember to put those exceptions in lower-case, they won't work otherwise
country_to_alpha3.update({ 'latinoamérica': 'lat',
'brazilian': 'bra',
'españa': 'esp',
'uk': 'gbr'
})
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
class Country(object):
"""This class represents a country.
You can initialize it with pretty much anything, as it knows conversion
from ISO-3166 2-letter and 3-letter codes, and an English name.
"""
def __init__(self, country, strict=False):
self.alpha3 = country_to_alpha3.get(country.lower())
if self.alpha3 is None and strict:
msg = 'The given string "%s" could not be identified as a country'
raise ValueError(msg % country)
if self.alpha3 is None:
self.alpha3 = 'unk'
@property
def alpha2(self):
return country_alpha3_to_alpha2[self.alpha3]
@property
def english_name(self):
return country_alpha3_to_en_name[self.alpha3]
def __hash__(self):
return hash(self.alpha3)
def __eq__(self, other):
if isinstance(other, Country):
return self.alpha3 == other.alpha3
if isinstance(other, basestring):
try:
return self == Country(other)
except ValueError:
return False
return False
def __ne__(self, other):
return not self == other
def __unicode__(self):
return self.english_name
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return 'Country(%s)' % self.english_name

2
libs/guessit/guess.py

@ -22,7 +22,7 @@ import json
import datetime
import logging
log = logging.getLogger("guessit.guess")
log = logging.getLogger(__name__)
class Guess(dict):

158
libs/guessit/language.py

@ -18,10 +18,18 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import fileutils
from guessit.country import Country
import re
import logging
log = logging.getLogger('guessit.language')
__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'search_language' ]
log = logging.getLogger(__name__)
# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
#
@ -30,9 +38,23 @@ log = logging.getLogger('guessit.language')
# an alpha-2 code (when given), an English name, and a French name of a language
# are all separated by pipe (|) characters."
_iso639_contents = fileutils.load_file_in_same_dir(__file__,
'ISO-639-2_utf-8.txt')
language_matrix = [ l.strip().decode('utf-8').split('|')
for l in _iso639_contents.split('\n') ]
'ISO-639-2_utf-8.txt').decode('utf-8')
# drop the BOM from the beginning of the file
_iso639_contents = _iso639_contents[1:]
language_matrix = [ l.strip().split('|')
for l in _iso639_contents.strip().split('\n') ]
language_matrix += [ [ 'unk', '', 'un', 'Unknown', 'inconnu' ] ]
# remove unused languages that shadow other common ones with a non-official form
for lang in language_matrix:
if (lang[2] == 'se' or # Northern Sami shadows Swedish
lang[2] == 'br'): # Breton shadows Brazilian
language_matrix.remove(lang)
lng3 = frozenset(l[0] for l in language_matrix if l[0])
lng3term = frozenset(l[1] for l in language_matrix if l[1])
@ -63,54 +85,126 @@ lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
for l in language_matrix if l[4]
for fr_name in l[4].split('; '))
# contains a list of exceptions: strings that should be parsed as a language
# but which are not in an ISO form
lng_exceptions = { 'gr': ('gre', None),
'greek': ('gre', None),
'esp': ('spa', None),
'español': ('spa', None),
'se': ('swe', None),
'po': ('pt', 'br'),
'pob': ('pt', 'br'),
'br': ('pt', 'br'),
'brazilian': ('pt', 'br'),
'català': ('cat', None),
'cz': ('cze', None),
'ua': ('ukr', None),
'cn': ('chi', None),
'chs': ('chi', None),
'jp': ('jpn', None)
}
def is_iso_language(language):
return language.lower() in lng_all_names
def is_language(language):
return language.lower() in lng_all_names
return is_iso_language(language) or language in lng_exceptions
def lang_set(languages, strict=False):
"""Return a set of guessit.Language created from their given string
representation.
if strict is True, then this will raise an exception if any language
could not be identified.
"""
return set(Language(l, strict=strict) for l in languages)
class Language(object):
"""This class represents a human language.
You can initialize it with pretty much everything, as it knows conversion
You can initialize it with pretty much anything, as it knows conversion
from ISO-639 2-letter and 3-letter codes, English and French names.
You can also distinguish languages for specific countries, such as
Portuguese and Brazilian Portuguese.
>>> Language('fr')
Language(French)
>>> Language('eng').french_name()
>>> Language('eng').french_name
u'anglais'
>>> Language('pt(br)').country.english_name
u'Brazil'
>>> Language('Español (Latinoamérica)').country.english_name
u'Latin America'
>>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
True
>>> Language('zz', strict=False).english_name
u'Unknown'
"""
def __init__(self, language):
lang = None
language = language.lower()
_with_country_regexp = re.compile('(.*)\((.*)\)')
def __init__(self, language, country=None, strict=False):
language = language.strip().lower()
if isinstance(language, str):
language = language.decode('utf-8')
with_country = Language._with_country_regexp.match(language)
if with_country:
self.lang = Language(with_country.group(1)).lang
self.country = Country(with_country.group(2))
return
self.lang = None
self.country = Country(country) if country else None
if len(language) == 2:
lang = lng2_to_lng3.get(language)
self.lang = lng2_to_lng3.get(language)
elif len(language) == 3:
lang = (language
if language in lng3
else lng3term_to_lng3.get(language))
self.lang = (language
if language in lng3
else lng3term_to_lng3.get(language))
else:
lang = (lng_en_name_to_lng3.get(language) or
lng_fr_name_to_lng3.get(language))
self.lang = (lng_en_name_to_lng3.get(language) or
lng_fr_name_to_lng3.get(language))
if lang is None:
msg = 'The given string "%s" could not be identified as a language'
raise ValueError(msg % language)
if self.lang is None and language in lng_exceptions:
lang, country = lng_exceptions[language]
self.lang = Language(lang).alpha3
self.country = Country(country) if country else None
self.lang = lang
msg = 'The given string "%s" could not be identified as a language' % language
def lng2(self):
if self.lang is None and strict:
raise ValueError(msg)
if self.lang is None:
log.debug(msg)
self.lang = 'unk'
@property
def alpha2(self):
return lng3_to_lng2[self.lang]
def lng3(self):
@property
def alpha3(self):
return self.lang
def lng3term(self):
@property
def alpha3term(self):
return lng3_to_lng3term[self.lang]
@property
def english_name(self):
return lng3_to_lng_en_name[self.lang]
@property
def french_name(self):
return lng3_to_lng_fr_name[self.lang]
@ -132,15 +226,27 @@ class Language(object):
def __ne__(self, other):
return not self == other
def __nonzero__(self):
return self.lang != 'unk'
def __unicode__(self):
return lng3_to_lng_en_name[self.lang]
if self.country:
return '%s(%s)' % (self.english_name, self.country.alpha2)
else:
return self.english_name
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return 'Language(%s)' % self
if self.country:
return 'Language(%s, country=%s)' % (self.english_name, self.country)
else:
return 'Language(%s)' % self.english_name
ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([Language('unk')])
ALL_LANGUAGES_NAMES = lng_all_names
def search_language(string, lang_filter=None):
"""Looks for language patterns, and if found return the language object,
@ -177,7 +283,7 @@ def search_language(string, lang_filter=None):
sep = r'[](){} \._-+'
if lang_filter:
lang_filter = set(Language(l) for l in lang_filter)
lang_filter = lang_set(lang_filter)
slow = ' %s ' % string.lower()
confidence = 1.0 # for all of them

5
libs/guessit/matcher.py

@ -25,7 +25,7 @@ from guessit.guess import (merge_similar_guesses, merge_all,
import copy
import logging
log = logging.getLogger("guessit.matcher")
log = logging.getLogger(__name__)
class IterativeMatcher(object):
@ -105,7 +105,7 @@ class IterativeMatcher(object):
'guess_release_group', 'guess_properties',
'guess_weak_episodes_rexps', 'guess_language']
else:
strategy = ['guess_date', 'guess_year', 'guess_video_rexps',
strategy = ['guess_date', 'guess_video_rexps',
'guess_website', 'guess_release_group',
'guess_properties', 'guess_language']
@ -125,6 +125,7 @@ class IterativeMatcher(object):
if mtree.guess['type'] in ('episode', 'episodesubtitle'):
apply_transfo('guess_episode_info_from_position')
else:
apply_transfo('guess_year')
apply_transfo('guess_movie_title_from_position')
# 6- perform some post-processing steps

2
libs/guessit/matchtree.py

@ -23,7 +23,7 @@ from guessit.textutils import clean_string, str_fill, to_utf8
from guessit.patterns import group_delimiters
import logging
log = logging.getLogger("guessit.matchtree")
log = logging.getLogger(__name__)
class BaseMatchTree(object):

7
libs/guessit/patterns.py

@ -22,8 +22,9 @@
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa', 'txt' ]
video_exts = [ 'avi', 'mkv', 'mpg', 'mp4', 'm4v', 'mov', 'ogg', 'ogm', 'ogv',
'wmv', 'divx' ]
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
group_delimiters = [ '()', '[]', '{}' ]
@ -62,6 +63,8 @@ weak_episode_rexps = [ # ... 213 or 0106 ...
# ... 2x13 ...
(sep + r'[^0-9](?P<season>[0-9]{1,2})\.(?P<episodeNumber>[0-9]{2})[^0-9]' + sep, (1, -1)),
# ... e13 ... for a mini-series without a season number
(r'e(?P<episodeNumber>[0-9]{1,4})[^0-9]', (0, -1)),
]
non_episode_title = [ 'extras', 'rip' ]

2
libs/guessit/transfo/__init__.py

@ -23,7 +23,7 @@ from guessit.patterns import canonical_form
from guessit.textutils import clean_string
import logging
log = logging.getLogger('guessit.transfo')
log = logging.getLogger(__name__)
def found_property(node, name, confidence):

2
libs/guessit/transfo/guess_bonus_features.py

@ -21,7 +21,7 @@
from guessit.transfo import found_property
import logging
log = logging.getLogger("guessit.transfo.guess_bonus_features")
log = logging.getLogger(__name__)
def process(mtree):

2
libs/guessit/transfo/guess_date.py

@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.date import search_date
import logging
log = logging.getLogger("guessit.transfo.guess_date")
log = logging.getLogger(__name__)
def guess_date(string):

5
libs/guessit/transfo/guess_episode_info_from_position.py

@ -22,7 +22,7 @@ from guessit.transfo import found_property
from guessit.patterns import non_episode_title, unlikely_series
import logging
log = logging.getLogger("guessit.transfo.guess_episode_info_from_position")
log = logging.getLogger(__name__)
def match_from_epnum_position(mtree, node):
@ -112,6 +112,9 @@ def process(mtree):
if len(title_candidates) >= 2:
found_property(title_candidates[0], 'series', 0.4)
found_property(title_candidates[1], 'title', 0.4)
elif len(title_candidates) == 1:
# but if there's only one candidate, it's probably the series name
found_property(title_candidates[0], 'series', 0.4)
# if we only have 1 remaining valid group in the folder containing the
# file, then it's likely that it is the series name

2
libs/guessit/transfo/guess_episodes_rexps.py

@ -24,7 +24,7 @@ from guessit.patterns import episode_rexps
import re
import logging
log = logging.getLogger("guessit.transfo.guess_episodes_rexps")
log = logging.getLogger(__name__)
def guess_episodes_rexps(string):

2
libs/guessit/transfo/guess_filetype.py

@ -26,7 +26,7 @@ import re
import mimetypes
import logging
log = logging.getLogger("guessit.transfo.guess_filetype")
log = logging.getLogger(__name__)
def guess_filetype(filename, filetype):

2
libs/guessit/transfo/guess_language.py

@ -24,7 +24,7 @@ from guessit.language import search_language
from guessit.textutils import clean_string
import logging
log = logging.getLogger("guessit.transfo.guess_language")
log = logging.getLogger(__name__)
def guess_language(string):

2
libs/guessit/transfo/guess_movie_title_from_position.py

@ -21,7 +21,7 @@
from guessit import Guess
import logging
log = logging.getLogger("guessit.transfo.guess_movie_title_from_position")
log = logging.getLogger(__name__)
def process(mtree):

2
libs/guessit/transfo/guess_properties.py

@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.patterns import find_properties
import logging
log = logging.getLogger("guessit.transfo.guess_properties")
log = logging.getLogger(__name__)
def guess_properties(string):

2
libs/guessit/transfo/guess_release_group.py

@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
import re
import logging
log = logging.getLogger("guessit.transfo.guess_release_group")
log = logging.getLogger(__name__)
def guess_release_group(string):

2
libs/guessit/transfo/guess_video_rexps.py

@ -24,7 +24,7 @@ from guessit.patterns import video_rexps, sep
import re
import logging
log = logging.getLogger("guessit.transfo.guess_video_rexps")
log = logging.getLogger(__name__)
def guess_video_rexps(string):

2
libs/guessit/transfo/guess_weak_episodes_rexps.py

@ -24,7 +24,7 @@ from guessit.patterns import weak_episode_rexps
import re
import logging
log = logging.getLogger("guessit.transfo.guess_weak_episodes_rexps")
log = logging.getLogger(__name__)
def guess_weak_episodes_rexps(string, node):

2
libs/guessit/transfo/guess_website.py

@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.patterns import websites
import logging
log = logging.getLogger("guessit.transfo.guess_website")
log = logging.getLogger(__name__)
def guess_website(string):

2
libs/guessit/transfo/guess_year.py

@ -22,7 +22,7 @@ from guessit.transfo import SingleNodeGuesser
from guessit.date import search_year
import logging
log = logging.getLogger("guessit.transfo.guess_year")
log = logging.getLogger(__name__)
def guess_year(string):

2
libs/guessit/transfo/post_process.py

@ -21,7 +21,7 @@
from guessit.patterns import subtitle_exts
import logging
log = logging.getLogger("guessit.transfo.post_process")
log = logging.getLogger(__name__)
def process(mtree):

2
libs/guessit/transfo/split_explicit_groups.py

@ -22,7 +22,7 @@ from guessit.textutils import find_first_level_groups
from guessit.patterns import group_delimiters
import logging
log = logging.getLogger("guessit.transfo.split_explicit_groups")
log = logging.getLogger(__name__)
def process(mtree):

2
libs/guessit/transfo/split_on_dash.py

@ -22,7 +22,7 @@ from guessit.patterns import sep
import re
import logging
log = logging.getLogger("guessit.transfo.split_on_dash")
log = logging.getLogger(__name__)
def process(mtree):

2
libs/guessit/transfo/split_path_components.py

@ -22,7 +22,7 @@ from guessit import fileutils
import os.path
import logging
log = logging.getLogger("guessit.transfo.split_path_components")
log = logging.getLogger(__name__)
def process(mtree):

Loading…
Cancel
Save