@@ -468,6 +468,30 @@ def normalize(localename):
468468
469469 return localename
470470
471+ def _conv_to_windows (locale ):
472+ locale = locale .replace ('_' , '-' )
473+ if '@' in locale :
474+ locale , modifier = locale .split ('@' , 1 )
475+ locale , _ , encoding = locale .partition ('.' )
476+ locale , _ , territory = locale .partition ('-' )
477+ suffix = ''
478+ modifier = modifier .lower ()
479+ if modifier == 'valencia' :
480+ suffix = '-' + modifier
481+ elif modifier :
482+ if modifier in _modifier_to_script :
483+ modifier = _modifier_to_script [modifier ]
484+ else :
485+ modifier = modifier .title ()
486+ locale += '-' + modifier
487+ if territory :
488+ locale += '-' + territory
489+ if suffix :
490+ locale += suffix
491+ if encoding :
492+ locale += '.' + encoding
493+ return locale
494+
471495def _parse_localename (localename ):
472496
473497 """ Parses the locale code for localename and returns the
@@ -621,6 +645,8 @@ def setlocale(category, locale=None):
621645 if locale and not isinstance (locale , _builtin_str ):
622646 # convert to string
623647 locale = normalize (_build_localename (locale ))
648+ if os .name == 'nt' :
649+ locale = _conv_to_windows (locale )
624650 return _setlocale (category , locale )
625651
626652
@@ -1546,9 +1572,9 @@ def getpreferredencoding(do_setlocale=True):
15461572 0x004d : "as" , # Assamese
15471573 0x044d : "as_IN" , # Assamese - India
15481574 0x002c : "az" , # Azerbaijani (Latin)
1549- 0x742c : "az" , # Azerbaijani (Cyrillic)
1550- 0x782c : "az" , # Azerbaijani (Latin)
1551- 0x042c : "az_AZ" , # Azerbaijani (Latin) - Azerbaijan
1575+ 0x742c : "az@cyrillic " , # Azerbaijani (Cyrillic)
1576+ 0x782c : "az@latin " , # Azerbaijani (Latin)
1577+ 0x042c : "az_AZ@latin " , # Azerbaijani (Latin) - Azerbaijan
15521578 0x0045 : "bn" , # Bangla
15531579 0x0445 : "bn_IN" , # Bangla - India
15541580 0x0845 : "bn_BD" , # Bangla - Bangladesh
@@ -1558,10 +1584,10 @@ def getpreferredencoding(do_setlocale=True):
15581584 0x042d : "eu_ES" , # Basque - Spain
15591585 0x0023 : "be" , # Belarusian
15601586 0x0423 : "be_BY" , # Belarusian - Belarus
1561- 0x641a : "bs" , # Bosnian (Cyrillic)
1562- 0x681a : "bs" , # Bosnian (Latin)
1563- 0x141a : "bs_BA" , # Bosnian (Latin) - Bosnia and Herzegovina
1564- 0x201a : "bs_BA" , # Bosnian (Cyrillic) - Bosnia and Herzegovina
1587+ 0x641a : "bs@cyrillic " , # Bosnian (Cyrillic)
1588+ 0x681a : "bs@latin " , # Bosnian (Latin)
1589+ 0x141a : "bs_BA@latin " , # Bosnian (Latin) - Bosnia and Herzegovina
1590+ 0x201a : "bs_BA@cyrillic " , # Bosnian (Cyrillic) - Bosnia and Herzegovina
15651591 0x781a : "bs" , # Bosnian (Latin)
15661592 0x007e : "br" , # Breton
15671593 0x047e : "br_FR" , # Breton - France
@@ -1571,16 +1597,16 @@ def getpreferredencoding(do_setlocale=True):
15711597 0x0455 : "my_MM" , # Burmese - Myanmar
15721598 0x0003 : "ca" , # Catalan
15731599 0x0403 : "ca_ES" , # Catalan - Spain
1574- 0x0803 : "ca_ES" , # Valencian - Spain
1600+ 0x0803 : "ca_ES@valencia " , # Valencian - Spain
15751601 0x0092 : "ku" , # Central Kurdish
1576- 0x7c92 : "ku" , # Central Kurdish
1577- 0x0492 : "ku_IQ" , # Central Kurdish - Iraq
1602+ 0x7c92 : "ku@arabic " , # Central Kurdish
1603+ 0x0492 : "ku_IQ@arabic " , # Central Kurdish - Iraq
15781604 0x005c : "chr" , # Cherokee
1579- 0x7c5c : "chr" , # Cherokee
1580- 0x045c : "chr_US" , # Cherokee - United States
1581- 0x0004 : "zh" , # Chinese (Simplified)
1605+ 0x7c5c : "chr@Cher " , # Cherokee
1606+ 0x045c : "chr_US@Cher " , # Cherokee - United States
1607+ 0x0004 : "zh@Hans " , # Chinese (Simplified)
15821608 0x7804 : "zh" , # Chinese (Simplified)
1583- 0x7c04 : "zh" , # Chinese (Traditional)
1609+ 0x7c04 : "zh@Hant " , # Chinese (Traditional)
15841610 0x0404 : "zh_TW" , # Chinese (Traditional) - Taiwan
15851611 0x0804 : "zh_CN" , # Chinese (Simplified) - People's Republic of China
15861612 0x0c04 : "zh_HK" , # Chinese (Traditional) - Hong Kong S.A.R.
@@ -1648,9 +1674,9 @@ def getpreferredencoding(do_setlocale=True):
16481674 0x0062 : "fy" , # Frisian
16491675 0x0462 : "fy_NL" , # Frisian - Netherlands
16501676 0x0067 : "ff" , # Fulah
1651- 0x7c67 : "ff" , # Fulah (Latin)
1652- 0x0467 : "ff_NG" ,
1653- 0x0867 : "ff_SN" , # Fulah - Senegal
1677+ 0x7c67 : "ff@latin " , # Fulah (Latin)
1678+ 0x0467 : "ff_NG@latin " ,
1679+ 0x0867 : "ff_SN@latin " , # Fulah - Senegal
16541680 0x0056 : "gl" , # Galician
16551681 0x0456 : "gl_ES" , # Galician - Spain
16561682 0x0037 : "ka" , # Georgian
@@ -1670,8 +1696,8 @@ def getpreferredencoding(do_setlocale=True):
16701696 0x0047 : "gu" , # Gujarati
16711697 0x0447 : "gu_IN" , # Gujarati - India
16721698 0x0068 : "ha" , # Hausa (Latin)
1673- 0x7c68 : "ha" , # Hausa (Latin)
1674- 0x0468 : "ha_NG" , # Hausa (Latin) - Nigeria
1699+ 0x7c68 : "ha@latin " , # Hausa (Latin)
1700+ 0x0468 : "ha_NG@latin " , # Hausa (Latin) - Nigeria
16751701 0x0075 : "haw" , # Hawaiian
16761702 0x0475 : "haw_US" , # Hawaiian - United States
16771703 0x000d : "he" , # Hebrew
@@ -1687,10 +1713,10 @@ def getpreferredencoding(do_setlocale=True):
16871713 0x0021 : "id" , # Indonesian
16881714 0x0421 : "id_ID" , # Indonesian - Indonesia
16891715 0x005d : "iu" , # Inuktitut (Latin)
1690- 0x785d : "iu" , # Inuktitut (Syllabics)
1691- 0x7c5d : "iu" , # Inuktitut (Latin)
1692- 0x045d : "iu_CA" , # Inuktitut (Syllabics) - Canada
1693- 0x085d : "iu_CA" , # Inuktitut (Latin) - Canada
1716+ 0x785d : "iu@Cans " , # Inuktitut (Syllabics)
1717+ 0x7c5d : "iu@latin " , # Inuktitut (Latin)
1718+ 0x045d : "iu_CA@Cans " , # Inuktitut (Syllabics) - Canada
1719+ 0x085d : "iu_CA@latin " , # Inuktitut (Latin) - Canada
16941720 0x003c : "ga" , # Irish
16951721 0x083c : "ga_IE" , # Irish - Ireland
16961722 0x0010 : "it" , # Italian
@@ -1700,10 +1726,10 @@ def getpreferredencoding(do_setlocale=True):
17001726 0x0411 : "ja_JP" , # Japanese - Japan
17011727 0x004b : "kn" , # Kannada
17021728 0x044b : "kn_IN" , # Kannada - India
1703- 0x0471 : "kr_NG" , # Kanuri (Latin) - Nigeria
1729+ 0x0471 : "kr_NG@latin " , # Kanuri (Latin) - Nigeria
17041730 0x0060 : "ks" , # Kashmiri
1705- 0x0460 : "ks" , # Kashmiri - Perso_Arabic
1706- 0x0860 : "ks_IN" , # Kashmiri (Devanagari) - India
1731+ 0x0460 : "ks@arabic " , # Kashmiri - Perso_Arabic
1732+ 0x0860 : "ks_IN@devanagari " , # Kashmiri (Devanagari) - India
17071733 0x003f : "kk" , # Kazakh
17081734 0x043f : "kk_KZ" , # Kazakh - Kazakhstan
17091735 0x0053 : "km" , # Khmer
@@ -1747,10 +1773,10 @@ def getpreferredencoding(do_setlocale=True):
17471773 0x007c : "moh" , # Mohawk
17481774 0x047c : "moh_CA" , # Mohawk - Canada
17491775 0x0050 : "mn" , # Mongolian (Cyrillic)
1750- 0x7850 : "mn" , # Mongolian (Cyrillic)
1751- 0x7c50 : "mn" , # Mongolian (Traditional Mongolian)
1776+ 0x7850 : "mn@cyrillic " , # Mongolian (Cyrillic)
1777+ 0x7c50 : "mn@Mong " , # Mongolian (Traditional Mongolian)
17521778 0x0450 : "mn_MN" , # Mongolian (Cyrillic) - Mongolia
1753- 0x0c50 : "mn_MN" , # Mongolian (Traditional Mongolian) - Mongolia
1779+ 0x0c50 : "mn_MN@Mong " , # Mongolian (Traditional Mongolian) - Mongolia
17541780 0x0061 : "ne" , # Nepali
17551781 0x0461 : "ne_NP" , # Nepali - Nepal
17561782 0x0861 : "ne_IN" , # Nepali - India
@@ -1775,9 +1801,9 @@ def getpreferredencoding(do_setlocale=True):
17751801 0x0416 : "pt_BR" , # Portuguese - Brazil
17761802 0x0816 : "pt_PT" , # Portuguese - Portugal
17771803 0x0046 : "pa" , # Punjabi
1778- 0x7c46 : "pa" , # Punjabi
1804+ 0x7c46 : "pa@arabic " , # Punjabi
17791805 0x0446 : "pa_IN" , # Punjabi - India
1780- 0x0846 : "pa_PK" , # Punjabi - Islamic Republic of Pakistan
1806+ 0x0846 : "pa_PK@arabic " , # Punjabi - Islamic Republic of Pakistan
17811807 0x006b : "quz" , # Quechua
17821808 0x046b : "quz_BO" , # Quechua - Bolivia
17831809 0x086b : "quz_EC" , # Quechua - Ecuador
@@ -1810,25 +1836,25 @@ def getpreferredencoding(do_setlocale=True):
18101836 0x044f : "sa_IN" , # Sanskrit - India
18111837 0x0091 : "gd" , # Scottish Gaelic
18121838 0x0491 : "gd_GB" , # Scottish Gaelic - United Kingdom
1813- 0x6c1a : "sr" , # Serbian (Cyrillic)
1814- 0x701a : "sr" , # Serbian (Latin)
1839+ 0x6c1a : "sr@cyrillic " , # Serbian (Cyrillic)
1840+ 0x701a : "sr@latin " , # Serbian (Latin)
18151841 0x7c1a : "sr" , # Serbian (Latin)
1816- 0x081a : "sr_CS" , # Serbian (Latin) - Serbia and Montenegro (Former)
1817- 0x0c1a : "sr_CS" , # Serbian (Cyrillic) - Serbia and Montenegro (Former)
1818- 0x181a : "sr_BA" , # Serbian (Latin) - Bosnia and Herzegovina
1819- 0x1c1a : "sr_BA" , # Serbian (Cyrillic) - Bosnia and Herzegovina
1820- 0x241a : "sr_RS" , # Serbian (Latin) - Serbia
1821- 0x281a : "sr_RS" , # Serbian (Cyrillic) - Serbia
1822- 0x2c1a : "sr_ME" , # Serbian (Latin) - Montenegro
1823- 0x301a : "sr_ME" , # Serbian (Cyrillic) - Montenegro
1842+ 0x081a : "sr_CS@latin " , # Serbian (Latin) - Serbia and Montenegro (Former)
1843+ 0x0c1a : "sr_CS@cyrillic " , # Serbian (Cyrillic) - Serbia and Montenegro (Former)
1844+ 0x181a : "sr_BA@latin " , # Serbian (Latin) - Bosnia and Herzegovina
1845+ 0x1c1a : "sr_BA@cyrillic " , # Serbian (Cyrillic) - Bosnia and Herzegovina
1846+ 0x241a : "sr_RS@latin " , # Serbian (Latin) - Serbia
1847+ 0x281a : "sr_RS@cyrillic " , # Serbian (Cyrillic) - Serbia
1848+ 0x2c1a : "sr_ME@latin " , # Serbian (Latin) - Montenegro
1849+ 0x301a : "sr_ME@cyrillic " , # Serbian (Cyrillic) - Montenegro
18241850 0x006c : "nso" , # Sesotho sa Leboa
18251851 0x046c : "nso_ZA" , # Sesotho sa Leboa - South Africa
18261852 0x0032 : "tn" , # Setswana
18271853 0x0432 : "tn_ZA" , # Setswana - South Africa
18281854 0x0832 : "tn_BW" , # Setswana - Botswana
18291855 0x0059 : "sd" , # Sindhi
1830- 0x7c59 : "sd" , # Sindhi
1831- 0x0859 : "sd_PK" , # Sindhi - Islamic Republic of Pakistan
1856+ 0x7c59 : "sd@arabic " , # Sindhi
1857+ 0x0859 : "sd_PK@arabic " , # Sindhi - Islamic Republic of Pakistan
18321858 0x005b : "si" , # Sinhala
18331859 0x045b : "si_LK" , # Sinhala - Sri Lanka
18341860 0x001b : "sk" , # Slovak
@@ -1867,14 +1893,14 @@ def getpreferredencoding(do_setlocale=True):
18671893 0x005a : "syr" , # Syriac
18681894 0x045a : "syr_SY" , # Syriac - Syria
18691895 0x0028 : "tg" , # Tajik (Cyrillic)
1870- 0x7c28 : "tg" , # Tajik (Cyrillic)
1871- 0x0428 : "tg_TJ" , # Tajik (Cyrillic) - Tajikistan
1896+ 0x7c28 : "tg@cyrillic " , # Tajik (Cyrillic)
1897+ 0x0428 : "tg_TJ@cyrillic " , # Tajik (Cyrillic) - Tajikistan
18721898 0x005f : "tzm" , # Tamazight (Latin)
1873- 0x785f : "tzm" ,
1874- 0x7c5f : "tzm" , # Tamazight (Latin)
1875- 0x085f : "tzm_DZ" , # Tamazight (Latin) - Algeria
1876- 0x045f : "tzm_MA" , # Central Atlas Tamazight (Arabic) - Morocco
1877- 0x105f : "tzm_MA" ,
1899+ 0x785f : "tzm@Tfng " ,
1900+ 0x7c5f : "tzm@latin " , # Tamazight (Latin)
1901+ 0x085f : "tzm_DZ@latin " , # Tamazight (Latin) - Algeria
1902+ 0x045f : "tzm_MA@arabic " , # Central Atlas Tamazight (Arabic) - Morocco
1903+ 0x105f : "tzm_MA@Tfng " ,
18781904 0x0049 : "ta" , # Tamil
18791905 0x0449 : "ta_IN" , # Tamil - India
18801906 0x0849 : "ta_LK" , # Tamil - Sri Lanka
@@ -1905,9 +1931,9 @@ def getpreferredencoding(do_setlocale=True):
19051931 0x0080 : "ug" , # Uyghur
19061932 0x0480 : "ug_CN" , # Uyghur - People's Republic of China
19071933 0x0043 : "uz" , # Uzbek (Latin)
1908- 0x7843 : "uz" , # Uzbek (Cyrillic)
1909- 0x7c43 : "uz" , # Uzbek (Latin)
1910- 0x0443 : "uz_UZ" , # Uzbek (Latin) - Uzbekistan
1934+ 0x7843 : "uz@cyrillic " , # Uzbek (Cyrillic)
1935+ 0x7c43 : "uz@latin " , # Uzbek (Latin)
1936+ 0x0443 : "uz_UZ@latin " , # Uzbek (Latin) - Uzbekistan
19111937 0x0033 : "ve" , # Venda
19121938 0x0433 : "ve_ZA" , # Venda - South Africa
19131939 0x002a : "vi" , # Vietnamese
@@ -1943,6 +1969,16 @@ def getpreferredencoding(do_setlocale=True):
19431969 0x00051004 : "zh_SG" ,
19441970}
19451971
1972+ # Maps Unix-like modifiers to ISO15924 script names
1973+ # https://www.unicode.org/iso15924/iso15924.txt
1974+
1975+ _modifier_to_script = {
1976+ 'arabic' : 'Arab' ,
1977+ 'cyrillic' : 'Cyrl' ,
1978+ 'devanagari' : 'Deva' ,
1979+ 'latin' : 'Latn' ,
1980+ }
1981+
19461982def _print_locale ():
19471983
19481984 """ Test function.
0 commit comments