1
0
mirror of https://github.com/moparisthebest/SickRage synced 2024-11-16 14:25:02 -05:00
SickRage/lib/subliminal/language.py
echel0n c945726f05 Fixed subliminal issues.
Added ignore/required words option to bet set individually for each show.
Fixed issue with global ignore words not properly matching against releases.
Fixed issue with
2014-04-23 22:18:16 -07:00

1049 lines
54 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com>
#
# This file is part of subliminal.
#
# subliminal is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# subliminal is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with subliminal. If not, see <http://www.gnu.org/licenses/>.
from .utils import to_unicode
import re
import logging
logger = logging.getLogger("subliminal")
COUNTRIES = [('AF', 'AFG', '004', u'Afghanistan'),
('AX', 'ALA', '248', u'Åland Islands'),
('AL', 'ALB', '008', u'Albania'),
('DZ', 'DZA', '012', u'Algeria'),
('AS', 'ASM', '016', u'American Samoa'),
('AD', 'AND', '020', u'Andorra'),
('AO', 'AGO', '024', u'Angola'),
('AI', 'AIA', '660', u'Anguilla'),
('AQ', 'ATA', '010', u'Antarctica'),
('AG', 'ATG', '028', u'Antigua and Barbuda'),
('AR', 'ARG', '032', u'Argentina'),
('AM', 'ARM', '051', u'Armenia'),
('AW', 'ABW', '533', u'Aruba'),
('AU', 'AUS', '036', u'Australia'),
('AT', 'AUT', '040', u'Austria'),
('AZ', 'AZE', '031', u'Azerbaijan'),
('BS', 'BHS', '044', u'Bahamas'),
('BH', 'BHR', '048', u'Bahrain'),
('BD', 'BGD', '050', u'Bangladesh'),
('BB', 'BRB', '052', u'Barbados'),
('BY', 'BLR', '112', u'Belarus'),
('BE', 'BEL', '056', u'Belgium'),
('BZ', 'BLZ', '084', u'Belize'),
('BJ', 'BEN', '204', u'Benin'),
('BM', 'BMU', '060', u'Bermuda'),
('BT', 'BTN', '064', u'Bhutan'),
('BO', 'BOL', '068', u'Bolivia, Plurinational State of'),
('BQ', 'BES', '535', u'Bonaire, Sint Eustatius and Saba'),
('BA', 'BIH', '070', u'Bosnia and Herzegovina'),
('BW', 'BWA', '072', u'Botswana'),
('BV', 'BVT', '074', u'Bouvet Island'),
('BR', 'BRA', '076', u'Brazil'),
('IO', 'IOT', '086', u'British Indian Ocean Territory'),
('BN', 'BRN', '096', u'Brunei Darussalam'),
('BG', 'BGR', '100', u'Bulgaria'),
('BF', 'BFA', '854', u'Burkina Faso'),
('BI', 'BDI', '108', u'Burundi'),
('KH', 'KHM', '116', u'Cambodia'),
('CM', 'CMR', '120', u'Cameroon'),
('CA', 'CAN', '124', u'Canada'),
('CV', 'CPV', '132', u'Cape Verde'),
('KY', 'CYM', '136', u'Cayman Islands'),
('CF', 'CAF', '140', u'Central African Republic'),
('TD', 'TCD', '148', u'Chad'),
('CL', 'CHL', '152', u'Chile'),
('CN', 'CHN', '156', u'China'),
('CX', 'CXR', '162', u'Christmas Island'),
('CC', 'CCK', '166', u'Cocos (Keeling) Islands'),
('CO', 'COL', '170', u'Colombia'),
('KM', 'COM', '174', u'Comoros'),
('CG', 'COG', '178', u'Congo'),
('CD', 'COD', '180', u'Congo, The Democratic Republic of the'),
('CK', 'COK', '184', u'Cook Islands'),
('CR', 'CRI', '188', u'Costa Rica'),
('CI', 'CIV', '384', u'Côte d\'Ivoire'),
('HR', 'HRV', '191', u'Croatia'),
('CU', 'CUB', '192', u'Cuba'),
('CW', 'CUW', '531', u'Curaçao'),
('CY', 'CYP', '196', u'Cyprus'),
('CZ', 'CZE', '203', u'Czech Republic'),
('DK', 'DNK', '208', u'Denmark'),
('DJ', 'DJI', '262', u'Djibouti'),
('DM', 'DMA', '212', u'Dominica'),
('DO', 'DOM', '214', u'Dominican Republic'),
('EC', 'ECU', '218', u'Ecuador'),
('EG', 'EGY', '818', u'Egypt'),
('SV', 'SLV', '222', u'El Salvador'),
('GQ', 'GNQ', '226', u'Equatorial Guinea'),
('ER', 'ERI', '232', u'Eritrea'),
('EE', 'EST', '233', u'Estonia'),
('ET', 'ETH', '231', u'Ethiopia'),
('FK', 'FLK', '238', u'Falkland Islands (Malvinas)'),
('FO', 'FRO', '234', u'Faroe Islands'),
('FJ', 'FJI', '242', u'Fiji'),
('FI', 'FIN', '246', u'Finland'),
('FR', 'FRA', '250', u'France'),
('GF', 'GUF', '254', u'French Guiana'),
('PF', 'PYF', '258', u'French Polynesia'),
('TF', 'ATF', '260', u'French Southern Territories'),
('GA', 'GAB', '266', u'Gabon'),
('GM', 'GMB', '270', u'Gambia'),
('GE', 'GEO', '268', u'Georgia'),
('DE', 'DEU', '276', u'Germany'),
('GH', 'GHA', '288', u'Ghana'),
('GI', 'GIB', '292', u'Gibraltar'),
('GR', 'GRC', '300', u'Greece'),
('GL', 'GRL', '304', u'Greenland'),
('GD', 'GRD', '308', u'Grenada'),
('GP', 'GLP', '312', u'Guadeloupe'),
('GU', 'GUM', '316', u'Guam'),
('GT', 'GTM', '320', u'Guatemala'),
('GG', 'GGY', '831', u'Guernsey'),
('GN', 'GIN', '324', u'Guinea'),
('GW', 'GNB', '624', u'Guinea-Bissau'),
('GY', 'GUY', '328', u'Guyana'),
('HT', 'HTI', '332', u'Haiti'),
('HM', 'HMD', '334', u'Heard Island and McDonald Islands'),
('VA', 'VAT', '336', u'Holy See (Vatican City State)'),
('HN', 'HND', '340', u'Honduras'),
('HK', 'HKG', '344', u'Hong Kong'),
('HU', 'HUN', '348', u'Hungary'),
('IS', 'ISL', '352', u'Iceland'),
('IN', 'IND', '356', u'India'),
('ID', 'IDN', '360', u'Indonesia'),
('IR', 'IRN', '364', u'Iran, Islamic Republic of'),
('IQ', 'IRQ', '368', u'Iraq'),
('IE', 'IRL', '372', u'Ireland'),
('IM', 'IMN', '833', u'Isle of Man'),
('IL', 'ISR', '376', u'Israel'),
('IT', 'ITA', '380', u'Italy'),
('JM', 'JAM', '388', u'Jamaica'),
('JP', 'JPN', '392', u'Japan'),
('JE', 'JEY', '832', u'Jersey'),
('JO', 'JOR', '400', u'Jordan'),
('KZ', 'KAZ', '398', u'Kazakhstan'),
('KE', 'KEN', '404', u'Kenya'),
('KI', 'KIR', '296', u'Kiribati'),
('KP', 'PRK', '408', u'Korea, Democratic People\'s Republic of'),
('KR', 'KOR', '410', u'Korea, Republic of'),
('KW', 'KWT', '414', u'Kuwait'),
('KG', 'KGZ', '417', u'Kyrgyzstan'),
('LA', 'LAO', '418', u'Lao People\'s Democratic Republic'),
('LV', 'LVA', '428', u'Latvia'),
('LB', 'LBN', '422', u'Lebanon'),
('LS', 'LSO', '426', u'Lesotho'),
('LR', 'LBR', '430', u'Liberia'),
('LY', 'LBY', '434', u'Libya'),
('LI', 'LIE', '438', u'Liechtenstein'),
('LT', 'LTU', '440', u'Lithuania'),
('LU', 'LUX', '442', u'Luxembourg'),
('MO', 'MAC', '446', u'Macao'),
('MK', 'MKD', '807', u'Macedonia, Republic of'),
('MG', 'MDG', '450', u'Madagascar'),
('MW', 'MWI', '454', u'Malawi'),
('MY', 'MYS', '458', u'Malaysia'),
('MV', 'MDV', '462', u'Maldives'),
('ML', 'MLI', '466', u'Mali'),
('MT', 'MLT', '470', u'Malta'),
('MH', 'MHL', '584', u'Marshall Islands'),
('MQ', 'MTQ', '474', u'Martinique'),
('MR', 'MRT', '478', u'Mauritania'),
('MU', 'MUS', '480', u'Mauritius'),
('YT', 'MYT', '175', u'Mayotte'),
('MX', 'MEX', '484', u'Mexico'),
('FM', 'FSM', '583', u'Micronesia, Federated States of'),
('MD', 'MDA', '498', u'Moldova, Republic of'),
('MC', 'MCO', '492', u'Monaco'),
('MN', 'MNG', '496', u'Mongolia'),
('ME', 'MNE', '499', u'Montenegro'),
('MS', 'MSR', '500', u'Montserrat'),
('MA', 'MAR', '504', u'Morocco'),
('MZ', 'MOZ', '508', u'Mozambique'),
('MM', 'MMR', '104', u'Myanmar'),
('NA', 'NAM', '516', u'Namibia'),
('NR', 'NRU', '520', u'Nauru'),
('NP', 'NPL', '524', u'Nepal'),
('NL', 'NLD', '528', u'Netherlands'),
('NC', 'NCL', '540', u'New Caledonia'),
('NZ', 'NZL', '554', u'New Zealand'),
('NI', 'NIC', '558', u'Nicaragua'),
('NE', 'NER', '562', u'Niger'),
('NG', 'NGA', '566', u'Nigeria'),
('NU', 'NIU', '570', u'Niue'),
('NF', 'NFK', '574', u'Norfolk Island'),
('MP', 'MNP', '580', u'Northern Mariana Islands'),
('NO', 'NOR', '578', u'Norway'),
('OM', 'OMN', '512', u'Oman'),
('PK', 'PAK', '586', u'Pakistan'),
('PW', 'PLW', '585', u'Palau'),
('PS', 'PSE', '275', u'Palestinian Territory, Occupied'),
('PA', 'PAN', '591', u'Panama'),
('PG', 'PNG', '598', u'Papua New Guinea'),
('PY', 'PRY', '600', u'Paraguay'),
('PE', 'PER', '604', u'Peru'),
('PH', 'PHL', '608', u'Philippines'),
('PN', 'PCN', '612', u'Pitcairn'),
('PL', 'POL', '616', u'Poland'),
('PT', 'PRT', '620', u'Portugal'),
('PR', 'PRI', '630', u'Puerto Rico'),
('QA', 'QAT', '634', u'Qatar'),
('RE', 'REU', '638', u'Réunion'),
('RO', 'ROU', '642', u'Romania'),
('RU', 'RUS', '643', u'Russian Federation'),
('RW', 'RWA', '646', u'Rwanda'),
('BL', 'BLM', '652', u'Saint Barthélemy'),
('SH', 'SHN', '654', u'Saint Helena, Ascension and Tristan da Cunha'),
('KN', 'KNA', '659', u'Saint Kitts and Nevis'),
('LC', 'LCA', '662', u'Saint Lucia'),
('MF', 'MAF', '663', u'Saint Martin (French part)'),
('PM', 'SPM', '666', u'Saint Pierre and Miquelon'),
('VC', 'VCT', '670', u'Saint Vincent and the Grenadines'),
('WS', 'WSM', '882', u'Samoa'),
('SM', 'SMR', '674', u'San Marino'),
('ST', 'STP', '678', u'Sao Tome and Principe'),
('SA', 'SAU', '682', u'Saudi Arabia'),
('SN', 'SEN', '686', u'Senegal'),
('RS', 'SRB', '688', u'Serbia'),
('SC', 'SYC', '690', u'Seychelles'),
('SL', 'SLE', '694', u'Sierra Leone'),
('SG', 'SGP', '702', u'Singapore'),
('SX', 'SXM', '534', u'Sint Maarten (Dutch part)'),
('SK', 'SVK', '703', u'Slovakia'),
('SI', 'SVN', '705', u'Slovenia'),
('SB', 'SLB', '090', u'Solomon Islands'),
('SO', 'SOM', '706', u'Somalia'),
('ZA', 'ZAF', '710', u'South Africa'),
('GS', 'SGS', '239', u'South Georgia and the South Sandwich Islands'),
('ES', 'ESP', '724', u'Spain'),
('LK', 'LKA', '144', u'Sri Lanka'),
('SD', 'SDN', '729', u'Sudan'),
('SR', 'SUR', '740', u'Suriname'),
('SS', 'SSD', '728', u'South Sudan'),
('SJ', 'SJM', '744', u'Svalbard and Jan Mayen'),
('SZ', 'SWZ', '748', u'Swaziland'),
('SE', 'SWE', '752', u'Sweden'),
('CH', 'CHE', '756', u'Switzerland'),
('SY', 'SYR', '760', u'Syrian Arab Republic'),
('TW', 'TWN', '158', u'Taiwan, Province of China'),
('TJ', 'TJK', '762', u'Tajikistan'),
('TZ', 'TZA', '834', u'Tanzania, United Republic of'),
('TH', 'THA', '764', u'Thailand'),
('TL', 'TLS', '626', u'Timor-Leste'),
('TG', 'TGO', '768', u'Togo'),
('TK', 'TKL', '772', u'Tokelau'),
('TO', 'TON', '776', u'Tonga'),
('TT', 'TTO', '780', u'Trinidad and Tobago'),
('TN', 'TUN', '788', u'Tunisia'),
('TR', 'TUR', '792', u'Turkey'),
('TM', 'TKM', '795', u'Turkmenistan'),
('TC', 'TCA', '796', u'Turks and Caicos Islands'),
('TV', 'TUV', '798', u'Tuvalu'),
('UG', 'UGA', '800', u'Uganda'),
('UA', 'UKR', '804', u'Ukraine'),
('AE', 'ARE', '784', u'United Arab Emirates'),
('GB', 'GBR', '826', u'United Kingdom'),
('US', 'USA', '840', u'United States'),
('UM', 'UMI', '581', u'United States Minor Outlying Islands'),
('UY', 'URY', '858', u'Uruguay'),
('UZ', 'UZB', '860', u'Uzbekistan'),
('VU', 'VUT', '548', u'Vanuatu'),
('VE', 'VEN', '862', u'Venezuela, Bolivarian Republic of'),
('VN', 'VNM', '704', u'Viet Nam'),
('VG', 'VGB', '092', u'Virgin Islands, British'),
('VI', 'VIR', '850', u'Virgin Islands, U.S.'),
('WF', 'WLF', '876', u'Wallis and Futuna'),
('EH', 'ESH', '732', u'Western Sahara'),
('YE', 'YEM', '887', u'Yemen'),
('ZM', 'ZMB', '894', u'Zambia'),
('ZW', 'ZWE', '716', u'Zimbabwe')]
LANGUAGES = [('aar', '', 'aa', u'Afar', u'afar'),
('abk', '', 'ab', u'Abkhazian', u'abkhaze'),
('ace', '', '', u'Achinese', u'aceh'),
('ach', '', '', u'Acoli', u'acoli'),
('ada', '', '', u'Adangme', u'adangme'),
('ady', '', '', u'Adyghe; Adygei', u'adyghé'),
('afa', '', '', u'Afro-Asiatic languages', u'afro-asiatiques, langues'),
('afh', '', '', u'Afrihili', u'afrihili'),
('afr', '', 'af', u'Afrikaans', u'afrikaans'),
('ain', '', '', u'Ainu', u'aïnou'),
('aka', '', 'ak', u'Akan', u'akan'),
('akk', '', '', u'Akkadian', u'akkadien'),
('alb', 'sqi', 'sq', u'Albanian', u'albanais'),
('ale', '', '', u'Aleut', u'aléoute'),
('alg', '', '', u'Algonquian languages', u'algonquines, langues'),
('alt', '', '', u'Southern Altai', u'altai du Sud'),
('amh', '', 'am', u'Amharic', u'amharique'),
('ang', '', '', u'English, Old (ca.450-1100)', u'anglo-saxon (ca.450-1100)'),
('anp', '', '', u'Angika', u'angika'),
('apa', '', '', u'Apache languages', u'apaches, langues'),
('ara', '', 'ar', u'Arabic', u'arabe'),
('arc', '', '', u'Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)', u'araméen d\'empire (700-300 BCE)'),
('arg', '', 'an', u'Aragonese', u'aragonais'),
('arm', 'hye', 'hy', u'Armenian', u'arménien'),
('arn', '', '', u'Mapudungun; Mapuche', u'mapudungun; mapuche; mapuce'),
('arp', '', '', u'Arapaho', u'arapaho'),
('art', '', '', u'Artificial languages', u'artificielles, langues'),
('arw', '', '', u'Arawak', u'arawak'),
('asm', '', 'as', u'Assamese', u'assamais'),
('ast', '', '', u'Asturian; Bable; Leonese; Asturleonese', u'asturien; bable; léonais; asturoléonais'),
('ath', '', '', u'Athapascan languages', u'athapascanes, langues'),
('aus', '', '', u'Australian languages', u'australiennes, langues'),
('ava', '', 'av', u'Avaric', u'avar'),
('ave', '', 'ae', u'Avestan', u'avestique'),
('awa', '', '', u'Awadhi', u'awadhi'),
('aym', '', 'ay', u'Aymara', u'aymara'),
('aze', '', 'az', u'Azerbaijani', u'azéri'),
('bad', '', '', u'Banda languages', u'banda, langues'),
('bai', '', '', u'Bamileke languages', u'bamiléké, langues'),
('bak', '', 'ba', u'Bashkir', u'bachkir'),
('bal', '', '', u'Baluchi', u'baloutchi'),
('bam', '', 'bm', u'Bambara', u'bambara'),
('ban', '', '', u'Balinese', u'balinais'),
('baq', 'eus', 'eu', u'Basque', u'basque'),
('bas', '', '', u'Basa', u'basa'),
('bat', '', '', u'Baltic languages', u'baltes, langues'),
('bej', '', '', u'Beja; Bedawiyet', u'bedja'),
('bel', '', 'be', u'Belarusian', u'biélorusse'),
('bem', '', '', u'Bemba', u'bemba'),
('ben', '', 'bn', u'Bengali', u'bengali'),
('ber', '', '', u'Berber languages', u'berbères, langues'),
('bho', '', '', u'Bhojpuri', u'bhojpuri'),
('bih', '', 'bh', u'Bihari languages', u'langues biharis'),
('bik', '', '', u'Bikol', u'bikol'),
('bin', '', '', u'Bini; Edo', u'bini; edo'),
('bis', '', 'bi', u'Bislama', u'bichlamar'),
('bla', '', '', u'Siksika', u'blackfoot'),
('bnt', '', '', u'Bantu (Other)', u'bantoues, autres langues'),
('bos', '', 'bs', u'Bosnian', u'bosniaque'),
('bra', '', '', u'Braj', u'braj'),
('bre', '', 'br', u'Breton', u'breton'),
('btk', '', '', u'Batak languages', u'batak, langues'),
('bua', '', '', u'Buriat', u'bouriate'),
('bug', '', '', u'Buginese', u'bugi'),
('bul', '', 'bg', u'Bulgarian', u'bulgare'),
('bur', 'mya', 'my', u'Burmese', u'birman'),
('byn', '', '', u'Blin; Bilin', u'blin; bilen'),
('cad', '', '', u'Caddo', u'caddo'),
('cai', '', '', u'Central American Indian languages', u'amérindiennes de L\'Amérique centrale, langues'),
('car', '', '', u'Galibi Carib', u'karib; galibi; carib'),
('cat', '', 'ca', u'Catalan; Valencian', u'catalan; valencien'),
('cau', '', '', u'Caucasian languages', u'caucasiennes, langues'),
('ceb', '', '', u'Cebuano', u'cebuano'),
('cel', '', '', u'Celtic languages', u'celtiques, langues; celtes, langues'),
('cha', '', 'ch', u'Chamorro', u'chamorro'),
('chb', '', '', u'Chibcha', u'chibcha'),
('che', '', 'ce', u'Chechen', u'tchétchène'),
('chg', '', '', u'Chagatai', u'djaghataï'),
('chi', 'zho', 'zh', u'Chinese', u'chinois'),
('chk', '', '', u'Chuukese', u'chuuk'),
('chm', '', '', u'Mari', u'mari'),
('chn', '', '', u'Chinook jargon', u'chinook, jargon'),
('cho', '', '', u'Choctaw', u'choctaw'),
('chp', '', '', u'Chipewyan; Dene Suline', u'chipewyan'),
('chr', '', '', u'Cherokee', u'cherokee'),
('chu', '', 'cu', u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic', u'slavon d\'église; vieux slave; slavon liturgique; vieux bulgare'),
('chv', '', 'cv', u'Chuvash', u'tchouvache'),
('chy', '', '', u'Cheyenne', u'cheyenne'),
('cmc', '', '', u'Chamic languages', u'chames, langues'),
('cop', '', '', u'Coptic', u'copte'),
('cor', '', 'kw', u'Cornish', u'cornique'),
('cos', '', 'co', u'Corsican', u'corse'),
('cpe', '', '', u'Creoles and pidgins, English based', u'créoles et pidgins basés sur l\'anglais'),
('cpf', '', '', u'Creoles and pidgins, French-based ', u'créoles et pidgins basés sur le français'),
('cpp', '', '', u'Creoles and pidgins, Portuguese-based ', u'créoles et pidgins basés sur le portugais'),
('cre', '', 'cr', u'Cree', u'cree'),
('crh', '', '', u'Crimean Tatar; Crimean Turkish', u'tatar de Crimé'),
('crp', '', '', u'Creoles and pidgins ', u'créoles et pidgins'),
('csb', '', '', u'Kashubian', u'kachoube'),
('cus', '', '', u'Cushitic languages', u'couchitiques, langues'),
('cze', 'ces', 'cs', u'Czech', u'tchèque'),
('dak', '', '', u'Dakota', u'dakota'),
('dan', '', 'da', u'Danish', u'danois'),
('dar', '', '', u'Dargwa', u'dargwa'),
('day', '', '', u'Land Dayak languages', u'dayak, langues'),
('del', '', '', u'Delaware', u'delaware'),
('den', '', '', u'Slave (Athapascan)', u'esclave (athapascan)'),
('dgr', '', '', u'Dogrib', u'dogrib'),
('din', '', '', u'Dinka', u'dinka'),
('div', '', 'dv', u'Divehi; Dhivehi; Maldivian', u'maldivien'),
('doi', '', '', u'Dogri', u'dogri'),
('dra', '', '', u'Dravidian languages', u'dravidiennes, langues'),
('dsb', '', '', u'Lower Sorbian', u'bas-sorabe'),
('dua', '', '', u'Duala', u'douala'),
('dum', '', '', u'Dutch, Middle (ca.1050-1350)', u'néerlandais moyen (ca. 1050-1350)'),
('dut', 'nld', 'nl', u'Dutch; Flemish', u'néerlandais; flamand'),
('dyu', '', '', u'Dyula', u'dioula'),
('dzo', '', 'dz', u'Dzongkha', u'dzongkha'),
('efi', '', '', u'Efik', u'efik'),
('egy', '', '', u'Egyptian (Ancient)', u'égyptien'),
('eka', '', '', u'Ekajuk', u'ekajuk'),
('elx', '', '', u'Elamite', u'élamite'),
('eng', '', 'en', u'English', u'anglais'),
('enm', '', '', u'English, Middle (1100-1500)', u'anglais moyen (1100-1500)'),
('epo', '', 'eo', u'Esperanto', u'espéranto'),
('est', '', 'et', u'Estonian', u'estonien'),
('ewe', '', 'ee', u'Ewe', u'éwé'),
('ewo', '', '', u'Ewondo', u'éwondo'),
('fan', '', '', u'Fang', u'fang'),
('fao', '', 'fo', u'Faroese', u'féroïen'),
('fat', '', '', u'Fanti', u'fanti'),
('fij', '', 'fj', u'Fijian', u'fidjien'),
('fil', '', '', u'Filipino; Pilipino', u'filipino; pilipino'),
('fin', '', 'fi', u'Finnish', u'finnois'),
('fiu', '', '', u'Finno-Ugrian languages', u'finno-ougriennes, langues'),
('fon', '', '', u'Fon', u'fon'),
('fre', 'fra', 'fr', u'French', u'français'),
('frm', '', '', u'French, Middle (ca.1400-1600)', u'français moyen (1400-1600)'),
('fro', '', '', u'French, Old (842-ca.1400)', u'français ancien (842-ca.1400)'),
('frr', '', '', u'Northern Frisian', u'frison septentrional'),
('frs', '', '', u'Eastern Frisian', u'frison oriental'),
('fry', '', 'fy', u'Western Frisian', u'frison occidental'),
('ful', '', 'ff', u'Fulah', u'peul'),
('fur', '', '', u'Friulian', u'frioulan'),
('gaa', '', '', u'Ga', u'ga'),
('gay', '', '', u'Gayo', u'gayo'),
('gba', '', '', u'Gbaya', u'gbaya'),
('gem', '', '', u'Germanic languages', u'germaniques, langues'),
('geo', 'kat', 'ka', u'Georgian', u'géorgien'),
('ger', 'deu', 'de', u'German', u'allemand'),
('gez', '', '', u'Geez', u'guèze'),
('gil', '', '', u'Gilbertese', u'kiribati'),
('gla', '', 'gd', u'Gaelic; Scottish Gaelic', u'gaélique; gaélique écossais'),
('gle', '', 'ga', u'Irish', u'irlandais'),
('glg', '', 'gl', u'Galician', u'galicien'),
('glv', '', 'gv', u'Manx', u'manx; mannois'),
('gmh', '', '', u'German, Middle High (ca.1050-1500)', u'allemand, moyen haut (ca. 1050-1500)'),
('goh', '', '', u'German, Old High (ca.750-1050)', u'allemand, vieux haut (ca. 750-1050)'),
('gon', '', '', u'Gondi', u'gond'),
('gor', '', '', u'Gorontalo', u'gorontalo'),
('got', '', '', u'Gothic', u'gothique'),
('grb', '', '', u'Grebo', u'grebo'),
('grc', '', '', u'Greek, Ancient (to 1453)', u'grec ancien (jusqu\'à 1453)'),
('gre', 'ell', 'el', u'Greek, Modern (1453-)', u'grec moderne (après 1453)'),
('grn', '', 'gn', u'Guarani', u'guarani'),
('gsw', '', '', u'Swiss German; Alemannic; Alsatian', u'suisse alémanique; alémanique; alsacien'),
('guj', '', 'gu', u'Gujarati', u'goudjrati'),
('gwi', '', '', u'Gwich\'in', u'gwich\'in'),
('hai', '', '', u'Haida', u'haida'),
('hat', '', 'ht', u'Haitian; Haitian Creole', u'haïtien; créole haïtien'),
('hau', '', 'ha', u'Hausa', u'haoussa'),
('haw', '', '', u'Hawaiian', u'hawaïen'),
('heb', '', 'he', u'Hebrew', u'hébreu'),
('her', '', 'hz', u'Herero', u'herero'),
('hil', '', '', u'Hiligaynon', u'hiligaynon'),
('him', '', '', u'Himachali languages; Western Pahari languages', u'langues himachalis; langues paharis occidentales'),
('hin', '', 'hi', u'Hindi', u'hindi'),
('hit', '', '', u'Hittite', u'hittite'),
('hmn', '', '', u'Hmong; Mong', u'hmong'),
('hmo', '', 'ho', u'Hiri Motu', u'hiri motu'),
('hrv', '', 'hr', u'Croatian', u'croate'),
('hsb', '', '', u'Upper Sorbian', u'haut-sorabe'),
('hun', '', 'hu', u'Hungarian', u'hongrois'),
('hup', '', '', u'Hupa', u'hupa'),
('iba', '', '', u'Iban', u'iban'),
('ibo', '', 'ig', u'Igbo', u'igbo'),
('ice', 'isl', 'is', u'Icelandic', u'islandais'),
('ido', '', 'io', u'Ido', u'ido'),
('iii', '', 'ii', u'Sichuan Yi; Nuosu', u'yi de Sichuan'),
('ijo', '', '', u'Ijo languages', u'ijo, langues'),
('iku', '', 'iu', u'Inuktitut', u'inuktitut'),
('ile', '', 'ie', u'Interlingue; Occidental', u'interlingue'),
('ilo', '', '', u'Iloko', u'ilocano'),
('ina', '', 'ia', u'Interlingua (International Auxiliary Language Association)', u'interlingua (langue auxiliaire internationale)'),
('inc', '', '', u'Indic languages', u'indo-aryennes, langues'),
('ind', '', 'id', u'Indonesian', u'indonésien'),
('ine', '', '', u'Indo-European languages', u'indo-européennes, langues'),
('inh', '', '', u'Ingush', u'ingouche'),
('ipk', '', 'ik', u'Inupiaq', u'inupiaq'),
('ira', '', '', u'Iranian languages', u'iraniennes, langues'),
('iro', '', '', u'Iroquoian languages', u'iroquoises, langues'),
('ita', '', 'it', u'Italian', u'italien'),
('jav', '', 'jv', u'Javanese', u'javanais'),
('jbo', '', '', u'Lojban', u'lojban'),
('jpn', '', 'ja', u'Japanese', u'japonais'),
('jpr', '', '', u'Judeo-Persian', u'judéo-persan'),
('jrb', '', '', u'Judeo-Arabic', u'judéo-arabe'),
('kaa', '', '', u'Kara-Kalpak', u'karakalpak'),
('kab', '', '', u'Kabyle', u'kabyle'),
('kac', '', '', u'Kachin; Jingpho', u'kachin; jingpho'),
('kal', '', 'kl', u'Kalaallisut; Greenlandic', u'groenlandais'),
('kam', '', '', u'Kamba', u'kamba'),
('kan', '', 'kn', u'Kannada', u'kannada'),
('kar', '', '', u'Karen languages', u'karen, langues'),
('kas', '', 'ks', u'Kashmiri', u'kashmiri'),
('kau', '', 'kr', u'Kanuri', u'kanouri'),
('kaw', '', '', u'Kawi', u'kawi'),
('kaz', '', 'kk', u'Kazakh', u'kazakh'),
('kbd', '', '', u'Kabardian', u'kabardien'),
('kha', '', '', u'Khasi', u'khasi'),
('khi', '', '', u'Khoisan languages', u'khoïsan, langues'),
('khm', '', 'km', u'Central Khmer', u'khmer central'),
('kho', '', '', u'Khotanese; Sakan', u'khotanais; sakan'),
('kik', '', 'ki', u'Kikuyu; Gikuyu', u'kikuyu'),
('kin', '', 'rw', u'Kinyarwanda', u'rwanda'),
('kir', '', 'ky', u'Kirghiz; Kyrgyz', u'kirghiz'),
('kmb', '', '', u'Kimbundu', u'kimbundu'),
('kok', '', '', u'Konkani', u'konkani'),
('kom', '', 'kv', u'Komi', u'kom'),
('kon', '', 'kg', u'Kongo', u'kongo'),
('kor', '', 'ko', u'Korean', u'coréen'),
('kos', '', '', u'Kosraean', u'kosrae'),
('kpe', '', '', u'Kpelle', u'kpellé'),
('krc', '', '', u'Karachay-Balkar', u'karatchai balkar'),
('krl', '', '', u'Karelian', u'carélien'),
('kro', '', '', u'Kru languages', u'krou, langues'),
('kru', '', '', u'Kurukh', u'kurukh'),
('kua', '', 'kj', u'Kuanyama; Kwanyama', u'kuanyama; kwanyama'),
('kum', '', '', u'Kumyk', u'koumyk'),
('kur', '', 'ku', u'Kurdish', u'kurde'),
('kut', '', '', u'Kutenai', u'kutenai'),
('lad', '', '', u'Ladino', u'judéo-espagnol'),
('lah', '', '', u'Lahnda', u'lahnda'),
('lam', '', '', u'Lamba', u'lamba'),
('lao', '', 'lo', u'Lao', u'lao'),
('lat', '', 'la', u'Latin', u'latin'),
('lav', '', 'lv', u'Latvian', u'letton'),
('lez', '', '', u'Lezghian', u'lezghien'),
('lim', '', 'li', u'Limburgan; Limburger; Limburgish', u'limbourgeois'),
('lin', '', 'ln', u'Lingala', u'lingala'),
('lit', '', 'lt', u'Lithuanian', u'lituanien'),
('lol', '', '', u'Mongo', u'mongo'),
('loz', '', '', u'Lozi', u'lozi'),
('ltz', '', 'lb', u'Luxembourgish; Letzeburgesch', u'luxembourgeois'),
('lua', '', '', u'Luba-Lulua', u'luba-lulua'),
('lub', '', 'lu', u'Luba-Katanga', u'luba-katanga'),
('lug', '', 'lg', u'Ganda', u'ganda'),
('lui', '', '', u'Luiseno', u'luiseno'),
('lun', '', '', u'Lunda', u'lunda'),
('luo', '', '', u'Luo (Kenya and Tanzania)', u'luo (Kenya et Tanzanie)'),
('lus', '', '', u'Lushai', u'lushai'),
('mac', 'mkd', 'mk', u'Macedonian', u'macédonien'),
('mad', '', '', u'Madurese', u'madourais'),
('mag', '', '', u'Magahi', u'magahi'),
('mah', '', 'mh', u'Marshallese', u'marshall'),
('mai', '', '', u'Maithili', u'maithili'),
('mak', '', '', u'Makasar', u'makassar'),
('mal', '', 'ml', u'Malayalam', u'malayalam'),
('man', '', '', u'Mandingo', u'mandingue'),
('mao', 'mri', 'mi', u'Maori', u'maori'),
('map', '', '', u'Austronesian languages', u'austronésiennes, langues'),
('mar', '', 'mr', u'Marathi', u'marathe'),
('mas', '', '', u'Masai', u'massaï'),
('may', 'msa', 'ms', u'Malay', u'malais'),
('mdf', '', '', u'Moksha', u'moksa'),
('mdr', '', '', u'Mandar', u'mandar'),
('men', '', '', u'Mende', u'mendé'),
('mga', '', '', u'Irish, Middle (900-1200)', u'irlandais moyen (900-1200)'),
('mic', '', '', u'Mi\'kmaq; Micmac', u'mi\'kmaq; micmac'),
('min', '', '', u'Minangkabau', u'minangkabau'),
('mkh', '', '', u'Mon-Khmer languages', u'môn-khmer, langues'),
('mlg', '', 'mg', u'Malagasy', u'malgache'),
('mlt', '', 'mt', u'Maltese', u'maltais'),
('mnc', '', '', u'Manchu', u'mandchou'),
('mni', '', '', u'Manipuri', u'manipuri'),
('mno', '', '', u'Manobo languages', u'manobo, langues'),
('moh', '', '', u'Mohawk', u'mohawk'),
('mon', '', 'mn', u'Mongolian', u'mongol'),
('mos', '', '', u'Mossi', u'moré'),
('mun', '', '', u'Munda languages', u'mounda, langues'),
('mus', '', '', u'Creek', u'muskogee'),
('mwl', '', '', u'Mirandese', u'mirandais'),
('mwr', '', '', u'Marwari', u'marvari'),
('myn', '', '', u'Mayan languages', u'maya, langues'),
('myv', '', '', u'Erzya', u'erza'),
('nah', '', '', u'Nahuatl languages', u'nahuatl, langues'),
('nai', '', '', u'North American Indian languages', u'nord-amérindiennes, langues'),
('nap', '', '', u'Neapolitan', u'napolitain'),
('nau', '', 'na', u'Nauru', u'nauruan'),
('nav', '', 'nv', u'Navajo; Navaho', u'navaho'),
('nbl', '', 'nr', u'Ndebele, South; South Ndebele', u'ndébélé du Sud'),
('nde', '', 'nd', u'Ndebele, North; North Ndebele', u'ndébélé du Nord'),
('ndo', '', 'ng', u'Ndonga', u'ndonga'),
('nds', '', '', u'Low German; Low Saxon; German, Low; Saxon, Low', u'bas allemand; bas saxon; allemand, bas; saxon, bas'),
('nep', '', 'ne', u'Nepali', u'népalais'),
('new', '', '', u'Nepal Bhasa; Newari', u'nepal bhasa; newari'),
('nia', '', '', u'Nias', u'nias'),
('nic', '', '', u'Niger-Kordofanian languages', u'nigéro-kordofaniennes, langues'),
('niu', '', '', u'Niuean', u'niué'),
('nno', '', 'nn', u'Norwegian Nynorsk; Nynorsk, Norwegian', u'norvégien nynorsk; nynorsk, norvégien'),
('nob', '', 'nb', u'Bokmål, Norwegian; Norwegian Bokmål', u'norvégien bokmål'),
('nog', '', '', u'Nogai', u'nogaï; nogay'),
('non', '', '', u'Norse, Old', u'norrois, vieux'),
('nor', '', 'no', u'Norwegian', u'norvégien'),
('nqo', '', '', u'N\'Ko', u'n\'ko'),
('nso', '', '', u'Pedi; Sepedi; Northern Sotho', u'pedi; sepedi; sotho du Nord'),
('nub', '', '', u'Nubian languages', u'nubiennes, langues'),
('nwc', '', '', u'Classical Newari; Old Newari; Classical Nepal Bhasa', u'newari classique'),
('nya', '', 'ny', u'Chichewa; Chewa; Nyanja', u'chichewa; chewa; nyanja'),
('nym', '', '', u'Nyamwezi', u'nyamwezi'),
('nyn', '', '', u'Nyankole', u'nyankolé'),
('nyo', '', '', u'Nyoro', u'nyoro'),
('nzi', '', '', u'Nzima', u'nzema'),
('oci', '', 'oc', u'Occitan (post 1500); Provençal', u'occitan (après 1500); provençal'),
('oji', '', 'oj', u'Ojibwa', u'ojibwa'),
('ori', '', 'or', u'Oriya', u'oriya'),
('orm', '', 'om', u'Oromo', u'galla'),
('osa', '', '', u'Osage', u'osage'),
('oss', '', 'os', u'Ossetian; Ossetic', u'ossète'),
('ota', '', '', u'Turkish, Ottoman (1500-1928)', u'turc ottoman (1500-1928)'),
('oto', '', '', u'Otomian languages', u'otomi, langues'),
('paa', '', '', u'Papuan languages', u'papoues, langues'),
('pag', '', '', u'Pangasinan', u'pangasinan'),
('pal', '', '', u'Pahlavi', u'pahlavi'),
('pam', '', '', u'Pampanga; Kapampangan', u'pampangan'),
('pan', '', 'pa', u'Panjabi; Punjabi', u'pendjabi'),
('pap', '', '', u'Papiamento', u'papiamento'),
('pau', '', '', u'Palauan', u'palau'),
('peo', '', '', u'Persian, Old (ca.600-400 B.C.)', u'perse, vieux (ca. 600-400 av. J.-C.)'),
('per', 'fas', 'fa', u'Persian', u'persan'),
('phi', '', '', u'Philippine languages', u'philippines, langues'),
('phn', '', '', u'Phoenician', u'phénicien'),
('pli', '', 'pi', u'Pali', u'pali'),
('pol', '', 'pl', u'Polish', u'polonais'),
('pon', '', '', u'Pohnpeian', u'pohnpei'),
('pob', '', 'pb', u'Brazilian Portuguese', u'brazilian portuguese'),
('por', '', 'pt', u'Portuguese', u'portugais'),
('pra', '', '', u'Prakrit languages', u'prâkrit, langues'),
('pro', '', '', u'Provençal, Old (to 1500)', u'provençal ancien (jusqu\'à 1500)'),
('pus', '', 'ps', u'Pushto; Pashto', u'pachto'),
('que', '', 'qu', u'Quechua', u'quechua'),
('raj', '', '', u'Rajasthani', u'rajasthani'),
('rap', '', '', u'Rapanui', u'rapanui'),
('rar', '', '', u'Rarotongan; Cook Islands Maori', u'rarotonga; maori des îles Cook'),
('roa', '', '', u'Romance languages', u'romanes, langues'),
('roh', '', 'rm', u'Romansh', u'romanche'),
('rom', '', '', u'Romany', u'tsigane'),
('rum', 'ron', 'ro', u'Romanian; Moldavian; Moldovan', u'roumain; moldave'),
('run', '', 'rn', u'Rundi', u'rundi'),
('rup', '', '', u'Aromanian; Arumanian; Macedo-Romanian', u'aroumain; macédo-roumain'),
('rus', '', 'ru', u'Russian', u'russe'),
('sad', '', '', u'Sandawe', u'sandawe'),
('sag', '', 'sg', u'Sango', u'sango'),
('sah', '', '', u'Yakut', u'iakoute'),
('sai', '', '', u'South American Indian (Other)', u'indiennes d\'Amérique du Sud, autres langues'),
('sal', '', '', u'Salishan languages', u'salishennes, langues'),
('sam', '', '', u'Samaritan Aramaic', u'samaritain'),
('san', '', 'sa', u'Sanskrit', u'sanskrit'),
('sas', '', '', u'Sasak', u'sasak'),
('sat', '', '', u'Santali', u'santal'),
('scn', '', '', u'Sicilian', u'sicilien'),
('sco', '', '', u'Scots', u'écossais'),
('sel', '', '', u'Selkup', u'selkoupe'),
('sem', '', '', u'Semitic languages', u'sémitiques, langues'),
('sga', '', '', u'Irish, Old (to 900)', u'irlandais ancien (jusqu\'à 900)'),
('sgn', '', '', u'Sign Languages', u'langues des signes'),
('shn', '', '', u'Shan', u'chan'),
('sid', '', '', u'Sidamo', u'sidamo'),
('sin', '', 'si', u'Sinhala; Sinhalese', u'singhalais'),
('sio', '', '', u'Siouan languages', u'sioux, langues'),
('sit', '', '', u'Sino-Tibetan languages', u'sino-tibétaines, langues'),
('sla', '', '', u'Slavic languages', u'slaves, langues'),
('slo', 'slk', 'sk', u'Slovak', u'slovaque'),
('slv', '', 'sl', u'Slovenian', u'slovène'),
('sma', '', '', u'Southern Sami', u'sami du Sud'),
('sme', '', 'se', u'Northern Sami', u'sami du Nord'),
('smi', '', '', u'Sami languages', u'sames, langues'),
('smj', '', '', u'Lule Sami', u'sami de Lule'),
('smn', '', '', u'Inari Sami', u'sami d\'Inari'),
('smo', '', 'sm', u'Samoan', u'samoan'),
('sms', '', '', u'Skolt Sami', u'sami skolt'),
('sna', '', 'sn', u'Shona', u'shona'),
('snd', '', 'sd', u'Sindhi', u'sindhi'),
('snk', '', '', u'Soninke', u'soninké'),
('sog', '', '', u'Sogdian', u'sogdien'),
('som', '', 'so', u'Somali', u'somali'),
('son', '', '', u'Songhai languages', u'songhai, langues'),
('sot', '', 'st', u'Sotho, Southern', u'sotho du Sud'),
('spa', '', 'es', u'Spanish; Castilian', u'espagnol; castillan'),
('srd', '', 'sc', u'Sardinian', u'sarde'),
('srn', '', '', u'Sranan Tongo', u'sranan tongo'),
('srp', '', 'sr', u'Serbian', u'serbe'),
('srr', '', '', u'Serer', u'sérère'),
('ssa', '', '', u'Nilo-Saharan languages', u'nilo-sahariennes, langues'),
('ssw', '', 'ss', u'Swati', u'swati'),
('suk', '', '', u'Sukuma', u'sukuma'),
('sun', '', 'su', u'Sundanese', u'soundanais'),
('sus', '', '', u'Susu', u'soussou'),
('sux', '', '', u'Sumerian', u'sumérien'),
('swa', '', 'sw', u'Swahili', u'swahili'),
('swe', '', 'sv', u'Swedish', u'suédois'),
('syc', '', '', u'Classical Syriac', u'syriaque classique'),
('syr', '', '', u'Syriac', u'syriaque'),
('tah', '', 'ty', u'Tahitian', u'tahitien'),
('tai', '', '', u'Tai languages', u'tai, langues'),
('tam', '', 'ta', u'Tamil', u'tamoul'),
('tat', '', 'tt', u'Tatar', u'tatar'),
('tel', '', 'te', u'Telugu', u'télougou'),
('tem', '', '', u'Timne', u'temne'),
('ter', '', '', u'Tereno', u'tereno'),
('tet', '', '', u'Tetum', u'tetum'),
('tgk', '', 'tg', u'Tajik', u'tadjik'),
('tgl', '', 'tl', u'Tagalog', u'tagalog'),
('tha', '', 'th', u'Thai', u'thaï'),
('tib', 'bod', 'bo', u'Tibetan', u'tibétain'),
('tig', '', '', u'Tigre', u'tigré'),
('tir', '', 'ti', u'Tigrinya', u'tigrigna'),
('tiv', '', '', u'Tiv', u'tiv'),
('tkl', '', '', u'Tokelau', u'tokelau'),
('tlh', '', '', u'Klingon; tlhIngan-Hol', u'klingon'),
('tli', '', '', u'Tlingit', u'tlingit'),
('tmh', '', '', u'Tamashek', u'tamacheq'),
('tog', '', '', u'Tonga (Nyasa)', u'tonga (Nyasa)'),
('ton', '', 'to', u'Tonga (Tonga Islands)', u'tongan (Îles Tonga)'),
('tpi', '', '', u'Tok Pisin', u'tok pisin'),
('tsi', '', '', u'Tsimshian', u'tsimshian'),
('tsn', '', 'tn', u'Tswana', u'tswana'),
('tso', '', 'ts', u'Tsonga', u'tsonga'),
('tuk', '', 'tk', u'Turkmen', u'turkmène'),
('tum', '', '', u'Tumbuka', u'tumbuka'),
('tup', '', '', u'Tupi languages', u'tupi, langues'),
('tur', '', 'tr', u'Turkish', u'turc'),
('tut', '', '', u'Altaic languages', u'altaïques, langues'),
('tvl', '', '', u'Tuvalu', u'tuvalu'),
('twi', '', 'tw', u'Twi', u'twi'),
('tyv', '', '', u'Tuvinian', u'touva'),
('udm', '', '', u'Udmurt', u'oudmourte'),
('uga', '', '', u'Ugaritic', u'ougaritique'),
('uig', '', 'ug', u'Uighur; Uyghur', u'ouïgour'),
('ukr', '', 'uk', u'Ukrainian', u'ukrainien'),
('umb', '', '', u'Umbundu', u'umbundu'),
('und', '', '', u'Undetermined', u'indéterminée'),
('urd', '', 'ur', u'Urdu', u'ourdou'),
('uzb', '', 'uz', u'Uzbek', u'ouszbek'),
('vai', '', '', u'Vai', u'vaï'),
('ven', '', 've', u'Venda', u'venda'),
('vie', '', 'vi', u'Vietnamese', u'vietnamien'),
('vol', '', 'vo', u'Volapük', u'volapük'),
('vot', '', '', u'Votic', u'vote'),
('wak', '', '', u'Wakashan languages', u'wakashanes, langues'),
('wal', '', '', u'Walamo', u'walamo'),
('war', '', '', u'Waray', u'waray'),
('was', '', '', u'Washo', u'washo'),
('wel', 'cym', 'cy', u'Welsh', u'gallois'),
('wen', '', '', u'Sorbian languages', u'sorabes, langues'),
('wln', '', 'wa', u'Walloon', u'wallon'),
('wol', '', 'wo', u'Wolof', u'wolof'),
('xal', '', '', u'Kalmyk; Oirat', u'kalmouk; oïrat'),
('xho', '', 'xh', u'Xhosa', u'xhosa'),
('yao', '', '', u'Yao', u'yao'),
('yap', '', '', u'Yapese', u'yapois'),
('yid', '', 'yi', u'Yiddish', u'yiddish'),
('yor', '', 'yo', u'Yoruba', u'yoruba'),
('ypk', '', '', u'Yupik languages', u'yupik, langues'),
('zap', '', '', u'Zapotec', u'zapotèque'),
('zbl', '', '', u'Blissymbols; Blissymbolics; Bliss', u'symboles Bliss; Bliss'),
('zen', '', '', u'Zenaga', u'zenaga'),
('zha', '', 'za', u'Zhuang; Chuang', u'zhuang; chuang'),
('znd', '', '', u'Zande languages', u'zandé, langues'),
('zul', '', 'zu', u'Zulu', u'zoulou'),
('zun', '', '', u'Zuni', u'zuni'),
('zza', '', '', u'Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki', u'zaza; dimili; dimli; kirdki; kirmanjki; zazaki')]
class Country(object):
"""Country according to ISO-3166
:param string country: country name, alpha2 code, alpha3 code or numeric code
:param list countries: all countries
:type countries: see :data:`~subliminal.language.COUNTRIES`
"""
def __init__(self, country, countries=None):
countries = countries or COUNTRIES
country = to_unicode(country.strip().lower())
country_tuple = None
# Try to find the country
if len(country) == 2:
country_tuple = dict((c[0].lower(), c) for c in countries).get(country)
elif len(country) == 3 and not country.isdigit():
country_tuple = dict((c[1].lower(), c) for c in countries).get(country)
elif len(country) == 3 and country.isdigit():
country_tuple = dict((c[2].lower(), c) for c in countries).get(country)
if country_tuple is None:
country_tuple = dict((c[3].lower(), c) for c in countries).get(country)
# Raise ValueError if nothing is found
if country_tuple is None:
raise ValueError('Country %s does not exist' % country)
# Set default attrs
self.alpha2 = country_tuple[0]
self.alpha3 = country_tuple[1]
self.numeric = country_tuple[2]
self.name = country_tuple[3]
def __hash__(self):
return hash(self.alpha3)
def __eq__(self, other):
if isinstance(other, Country):
return self.alpha3 == other.alpha3
return False
def __ne__(self, other):
return not self == other
def __unicode__(self):
return self.name
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return 'Country(%s)' % self
class Language(object):
"""Language according to ISO-639
:param string language: language name (english or french), alpha2 code, alpha3 code, terminologic code or numeric code, eventually with a country
:param country: country of the language
:type country: :class:`Country` or string
:param languages: all languages
:type languages: see :data:`~subliminal.language.LANGUAGES`
:param countries: all countries
:type countries: see :data:`~subliminal.language.COUNTRIES`
:param bool strict: whether to raise a ValueError on unknown language or not
:class:`Language` implements the inclusion test, with the ``in`` keyword::
>>> Language('pt-BR') in Language('pt') # Portuguese (Brazil) is included in Portuguese
True
>>> Language('pt') in Language('pt-BR') # Portuguese is not included in Portuguese (Brazil)
False
"""
with_country_regexps = [re.compile('(.*)\((.*)\)'), re.compile('(.*)[-_](.*)')]
def __init__(self, language, country=None, languages=None, countries=None, strict=True):
languages = languages or LANGUAGES
countries = countries or COUNTRIES
# Get the country
self.country = None
if isinstance(country, Country):
self.country = country
elif isinstance(country, basestring):
try:
self.country = Country(country, countries)
except ValueError:
logger.warning(u'Country %s could not be identified' % country)
if strict:
raise
# Language + Country format
#TODO: Improve this part
if country is None:
for regexp in [r.match(language) for r in self.with_country_regexps]:
if regexp:
language = regexp.group(1)
try:
self.country = Country(regexp.group(2), countries)
except ValueError:
logger.warning(u'Country %s could not be identified' % country)
if strict:
raise
break
# Try to find the language
language = to_unicode(language.strip().lower())
language_tuple = None
if len(language) == 2:
language_tuple = dict((l[2].lower(), l) for l in languages).get(language)
elif len(language) == 3:
language_tuple = dict((l[0].lower(), l) for l in languages).get(language)
if language_tuple is None:
language_tuple = dict((l[1].lower(), l) for l in languages).get(language)
if language_tuple is None:
language_tuple = dict((l[3].split('; ')[0].lower(), l) for l in languages).get(language)
if language_tuple is None:
language_tuple = dict((l[4].split('; ')[0].lower(), l) for l in languages).get(language)
# Raise ValueError if strict or continue with Undetermined
if language_tuple is None:
if strict:
raise ValueError('Language %s does not exist' % language)
language_tuple = dict((l[0].lower(), l) for l in languages).get('und')
# Set attributes
self.alpha2 = language_tuple[2]
self.alpha3 = language_tuple[0]
self.terminologic = language_tuple[1]
self.name = language_tuple[3]
self.french_name = language_tuple[4]
def __hash__(self):
if self.country is None:
return hash(self.alpha3)
return hash(self.alpha3 + self.country.alpha3)
def __eq__(self, other):
if isinstance(other, Language):
return self.alpha3 == other.alpha3 and self.country == other.country
return False
def __contains__(self, item):
if isinstance(item, Language):
if self == item:
return True
if self.country is None:
return self.alpha3 == item.alpha3
return False
def __ne__(self, other):
return not self == other
def __nonzero__(self):
return self.alpha3 != 'und'
def __unicode__(self):
if self.country is None:
return self.name
return '%s (%s)' % (self.name, self.country)
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
if self.country is None:
return 'Language(%s)' % self.name.encode('utf-8')
return 'Language(%s, country=%s)' % (self.name.encode('utf-8'), self.country)
class language_set(set):
"""Set of :class:`Language` with some specificities.
:param iterable: where to take elements from
:type iterable: iterable of :class:`Languages <Language>` or string
:param languages: all languages
:type languages: see :data:`~subliminal.language.LANGUAGES`
:param bool strict: whether to raise a ValueError on invalid language or not
The following redefinitions are meant to reflect the inclusion logic in :class:`Language`
* Inclusion test, with the ``in`` keyword
* Intersection
* Substraction
Here is an illustration of the previous points::
>>> Language('en') in language_set(['en-US', 'en-CA'])
False
>>> Language('en-US') in language_set(['en', 'fr'])
True
>>> language_set(['en']) & language_set(['en-US', 'en-CA'])
language_set([Language(English, country=Canada), Language(English, country=United States)])
>>> language_set(['en-US', 'en-CA', 'fr']) - language_set(['en'])
language_set([Language(French)])
"""
def __init__(self, iterable=None, languages=None, strict=True):
iterable = iterable or []
languages = languages or LANGUAGES
items = []
for i in iterable:
if isinstance(i, Language):
items.append(i)
continue
if isinstance(i, tuple):
items.append(Language(i[0], languages=languages, strict=strict))
continue
items.append(Language(i, languages=languages, strict=strict))
super(language_set, self).__init__(items)
def __contains__(self, item):
for i in self:
if item in i:
return True
return super(language_set, self).__contains__(item)
def __and__(self, other):
results = language_set()
for i in self:
for j in other:
if i in j:
results.add(i)
for i in other:
for j in self:
if i in j:
results.add(i)
return results
def __sub__(self, other):
results = language_set()
for i in self:
if i not in other:
results.add(i)
return results
class language_list(list):
"""List of :class:`Language` with some specificities.
:param iterable: where to take elements from
:type iterable: iterable of :class:`Languages <Language>` or string
:param languages: all languages
:type languages: see :data:`~subliminal.language.LANGUAGES`
:param bool strict: whether to raise a ValueError on invalid language or not
The following redefinitions are meant to reflect the inclusion logic in :class:`Language`
* Inclusion test, with the ``in`` keyword
* Index
Here is an illustration of the previous points::
>>> Language('en') in language_list(['en-US', 'en-CA'])
False
>>> Language('en-US') in language_list(['en', 'fr-BE'])
True
>>> language_list(['en', 'fr-BE']).index(Language('en-US'))
0
"""
def __init__(self, iterable=None, languages=None, strict=True):
iterable = iterable or []
languages = languages or LANGUAGES
items = []
for i in iterable:
if isinstance(i, Language):
items.append(i)
continue
if isinstance(i, tuple):
items.append(Language(i[0], languages=languages, strict=strict))
continue
items.append(Language(i, languages=languages, strict=strict))
super(language_list, self).__init__(items)
def __contains__(self, item):
for i in self:
if item in i:
return True
return super(language_list, self).__contains__(item)
def index(self, x, strict=False):
if not strict:
for i in range(len(self)):
if x in self[i]:
return i
return super(language_list, self).index(x)