Skip to content

Commit d4a5f1d

Browse files
gh-148792: Add support for locales with @-modifiers on Windows
locale.setlocale() now supports Unix-like locale names with @-modifiers on Windows. For example: "ca_ES@valencia", "sr_RS@latin", "uz_UZ@cyrillic" and "ks_IN@devanagari".
1 parent bfe6f9f commit d4a5f1d

File tree

3 files changed

+93
-54
lines changed

3 files changed

+93
-54
lines changed

Doc/whatsnew/3.15.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,7 @@ locale
895895
* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers.
896896
``@``-modifiers are no longer silently removed in :func:`~locale.getlocale`,
897897
but included in the language code.
898-
(Contributed by Serhiy Storchaka in :gh:`137729`.)
898+
(Contributed by Serhiy Storchaka in :gh:`137729` and :gh:`148792`.)
899899

900900
* Undeprecate the :func:`locale.getdefaultlocale` function.
901901
(Contributed by Victor Stinner in :gh:`130796`.)

Lib/locale.py

Lines changed: 89 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,30 @@ def normalize(localename):
468468

469469
return localename
470470

471+
def _conv_to_windows(locale):
472+
locale = locale.replace('_', '-')
473+
if '@' in locale:
474+
locale, modifier = locale.split('@', 1)
475+
locale, _, encoding = locale.partition('.')
476+
locale, _, territory = locale.partition('-')
477+
suffix = ''
478+
modifier = modifier.lower()
479+
if modifier == 'valencia':
480+
suffix = '-' + modifier
481+
elif modifier:
482+
if modifier in _modifier_to_script:
483+
modifier = _modifier_to_script[modifier]
484+
else:
485+
modifier = modifier.title()
486+
locale += '-' + modifier
487+
if territory:
488+
locale += '-' + territory
489+
if suffix:
490+
locale += suffix
491+
if encoding:
492+
locale += '.' + encoding
493+
return locale
494+
471495
def _parse_localename(localename):
472496

473497
""" Parses the locale code for localename and returns the
@@ -621,6 +645,8 @@ def setlocale(category, locale=None):
621645
if locale and not isinstance(locale, _builtin_str):
622646
# convert to string
623647
locale = normalize(_build_localename(locale))
648+
if os.name == 'nt':
649+
locale = _conv_to_windows(locale)
624650
return _setlocale(category, locale)
625651

626652

@@ -1546,9 +1572,9 @@ def getpreferredencoding(do_setlocale=True):
15461572
0x004d: "as", # Assamese
15471573
0x044d: "as_IN", # Assamese - India
15481574
0x002c: "az", # Azerbaijani (Latin)
1549-
0x742c: "az", # Azerbaijani (Cyrillic)
1550-
0x782c: "az", # Azerbaijani (Latin)
1551-
0x042c: "az_AZ", # Azerbaijani (Latin) - Azerbaijan
1575+
0x742c: "az@cyrillic", # Azerbaijani (Cyrillic)
1576+
0x782c: "az@latin", # Azerbaijani (Latin)
1577+
0x042c: "az_AZ@latin", # Azerbaijani (Latin) - Azerbaijan
15521578
0x0045: "bn", # Bangla
15531579
0x0445: "bn_IN", # Bangla - India
15541580
0x0845: "bn_BD", # Bangla - Bangladesh
@@ -1558,10 +1584,10 @@ def getpreferredencoding(do_setlocale=True):
15581584
0x042d: "eu_ES", # Basque - Spain
15591585
0x0023: "be", # Belarusian
15601586
0x0423: "be_BY", # Belarusian - Belarus
1561-
0x641a: "bs", # Bosnian (Cyrillic)
1562-
0x681a: "bs", # Bosnian (Latin)
1563-
0x141a: "bs_BA", # Bosnian (Latin) - Bosnia and Herzegovina
1564-
0x201a: "bs_BA", # Bosnian (Cyrillic) - Bosnia and Herzegovina
1587+
0x641a: "bs@cyrillic", # Bosnian (Cyrillic)
1588+
0x681a: "bs@latin", # Bosnian (Latin)
1589+
0x141a: "bs_BA@latin", # Bosnian (Latin) - Bosnia and Herzegovina
1590+
0x201a: "bs_BA@cyrillic", # Bosnian (Cyrillic) - Bosnia and Herzegovina
15651591
0x781a: "bs", # Bosnian (Latin)
15661592
0x007e: "br", # Breton
15671593
0x047e: "br_FR", # Breton - France
@@ -1571,16 +1597,16 @@ def getpreferredencoding(do_setlocale=True):
15711597
0x0455: "my_MM", # Burmese - Myanmar
15721598
0x0003: "ca", # Catalan
15731599
0x0403: "ca_ES", # Catalan - Spain
1574-
0x0803: "ca_ES", # Valencian - Spain
1600+
0x0803: "ca_ES@valencia", # Valencian - Spain
15751601
0x0092: "ku", # Central Kurdish
1576-
0x7c92: "ku", # Central Kurdish
1577-
0x0492: "ku_IQ", # Central Kurdish - Iraq
1602+
0x7c92: "ku@arabic", # Central Kurdish
1603+
0x0492: "ku_IQ@arabic", # Central Kurdish - Iraq
15781604
0x005c: "chr", # Cherokee
1579-
0x7c5c: "chr", # Cherokee
1580-
0x045c: "chr_US", # Cherokee - United States
1581-
0x0004: "zh", # Chinese (Simplified)
1605+
0x7c5c: "chr@Cher", # Cherokee
1606+
0x045c: "chr_US@Cher", # Cherokee - United States
1607+
0x0004: "zh@Hans", # Chinese (Simplified)
15821608
0x7804: "zh", # Chinese (Simplified)
1583-
0x7c04: "zh", # Chinese (Traditional)
1609+
0x7c04: "zh@Hant", # Chinese (Traditional)
15841610
0x0404: "zh_TW", # Chinese (Traditional) - Taiwan
15851611
0x0804: "zh_CN", # Chinese (Simplified) - People's Republic of China
15861612
0x0c04: "zh_HK", # Chinese (Traditional) - Hong Kong S.A.R.
@@ -1648,9 +1674,9 @@ def getpreferredencoding(do_setlocale=True):
16481674
0x0062: "fy", # Frisian
16491675
0x0462: "fy_NL", # Frisian - Netherlands
16501676
0x0067: "ff", # Fulah
1651-
0x7c67: "ff", # Fulah (Latin)
1652-
0x0467: "ff_NG",
1653-
0x0867: "ff_SN", # Fulah - Senegal
1677+
0x7c67: "ff@latin", # Fulah (Latin)
1678+
0x0467: "ff_NG@latin",
1679+
0x0867: "ff_SN@latin", # Fulah - Senegal
16541680
0x0056: "gl", # Galician
16551681
0x0456: "gl_ES", # Galician - Spain
16561682
0x0037: "ka", # Georgian
@@ -1670,8 +1696,8 @@ def getpreferredencoding(do_setlocale=True):
16701696
0x0047: "gu", # Gujarati
16711697
0x0447: "gu_IN", # Gujarati - India
16721698
0x0068: "ha", # Hausa (Latin)
1673-
0x7c68: "ha", # Hausa (Latin)
1674-
0x0468: "ha_NG", # Hausa (Latin) - Nigeria
1699+
0x7c68: "ha@latin", # Hausa (Latin)
1700+
0x0468: "ha_NG@latin", # Hausa (Latin) - Nigeria
16751701
0x0075: "haw", # Hawaiian
16761702
0x0475: "haw_US", # Hawaiian - United States
16771703
0x000d: "he", # Hebrew
@@ -1687,10 +1713,10 @@ def getpreferredencoding(do_setlocale=True):
16871713
0x0021: "id", # Indonesian
16881714
0x0421: "id_ID", # Indonesian - Indonesia
16891715
0x005d: "iu", # Inuktitut (Latin)
1690-
0x785d: "iu", # Inuktitut (Syllabics)
1691-
0x7c5d: "iu", # Inuktitut (Latin)
1692-
0x045d: "iu_CA", # Inuktitut (Syllabics) - Canada
1693-
0x085d: "iu_CA", # Inuktitut (Latin) - Canada
1716+
0x785d: "iu@Cans", # Inuktitut (Syllabics)
1717+
0x7c5d: "iu@latin", # Inuktitut (Latin)
1718+
0x045d: "iu_CA@Cans", # Inuktitut (Syllabics) - Canada
1719+
0x085d: "iu_CA@latin", # Inuktitut (Latin) - Canada
16941720
0x003c: "ga", # Irish
16951721
0x083c: "ga_IE", # Irish - Ireland
16961722
0x0010: "it", # Italian
@@ -1700,10 +1726,10 @@ def getpreferredencoding(do_setlocale=True):
17001726
0x0411: "ja_JP", # Japanese - Japan
17011727
0x004b: "kn", # Kannada
17021728
0x044b: "kn_IN", # Kannada - India
1703-
0x0471: "kr_NG", # Kanuri (Latin) - Nigeria
1729+
0x0471: "kr_NG@latin", # Kanuri (Latin) - Nigeria
17041730
0x0060: "ks", # Kashmiri
1705-
0x0460: "ks", # Kashmiri - Perso_Arabic
1706-
0x0860: "ks_IN", # Kashmiri (Devanagari) - India
1731+
0x0460: "ks@arabic", # Kashmiri - Perso_Arabic
1732+
0x0860: "ks_IN@devanagari", # Kashmiri (Devanagari) - India
17071733
0x003f: "kk", # Kazakh
17081734
0x043f: "kk_KZ", # Kazakh - Kazakhstan
17091735
0x0053: "km", # Khmer
@@ -1747,10 +1773,10 @@ def getpreferredencoding(do_setlocale=True):
17471773
0x007c: "moh", # Mohawk
17481774
0x047c: "moh_CA", # Mohawk - Canada
17491775
0x0050: "mn", # Mongolian (Cyrillic)
1750-
0x7850: "mn", # Mongolian (Cyrillic)
1751-
0x7c50: "mn", # Mongolian (Traditional Mongolian)
1776+
0x7850: "mn@cyrillic", # Mongolian (Cyrillic)
1777+
0x7c50: "mn@Mong", # Mongolian (Traditional Mongolian)
17521778
0x0450: "mn_MN", # Mongolian (Cyrillic) - Mongolia
1753-
0x0c50: "mn_MN", # Mongolian (Traditional Mongolian) - Mongolia
1779+
0x0c50: "mn_MN@Mong", # Mongolian (Traditional Mongolian) - Mongolia
17541780
0x0061: "ne", # Nepali
17551781
0x0461: "ne_NP", # Nepali - Nepal
17561782
0x0861: "ne_IN", # Nepali - India
@@ -1775,9 +1801,9 @@ def getpreferredencoding(do_setlocale=True):
17751801
0x0416: "pt_BR", # Portuguese - Brazil
17761802
0x0816: "pt_PT", # Portuguese - Portugal
17771803
0x0046: "pa", # Punjabi
1778-
0x7c46: "pa", # Punjabi
1804+
0x7c46: "pa@arabic", # Punjabi
17791805
0x0446: "pa_IN", # Punjabi - India
1780-
0x0846: "pa_PK", # Punjabi - Islamic Republic of Pakistan
1806+
0x0846: "pa_PK@arabic", # Punjabi - Islamic Republic of Pakistan
17811807
0x006b: "quz", # Quechua
17821808
0x046b: "quz_BO", # Quechua - Bolivia
17831809
0x086b: "quz_EC", # Quechua - Ecuador
@@ -1810,25 +1836,25 @@ def getpreferredencoding(do_setlocale=True):
18101836
0x044f: "sa_IN", # Sanskrit - India
18111837
0x0091: "gd", # Scottish Gaelic
18121838
0x0491: "gd_GB", # Scottish Gaelic - United Kingdom
1813-
0x6c1a: "sr", # Serbian (Cyrillic)
1814-
0x701a: "sr", # Serbian (Latin)
1839+
0x6c1a: "sr@cyrillic", # Serbian (Cyrillic)
1840+
0x701a: "sr@latin", # Serbian (Latin)
18151841
0x7c1a: "sr", # Serbian (Latin)
1816-
0x081a: "sr_CS", # Serbian (Latin) - Serbia and Montenegro (Former)
1817-
0x0c1a: "sr_CS", # Serbian (Cyrillic) - Serbia and Montenegro (Former)
1818-
0x181a: "sr_BA", # Serbian (Latin) - Bosnia and Herzegovina
1819-
0x1c1a: "sr_BA", # Serbian (Cyrillic) - Bosnia and Herzegovina
1820-
0x241a: "sr_RS", # Serbian (Latin) - Serbia
1821-
0x281a: "sr_RS", # Serbian (Cyrillic) - Serbia
1822-
0x2c1a: "sr_ME", # Serbian (Latin) - Montenegro
1823-
0x301a: "sr_ME", # Serbian (Cyrillic) - Montenegro
1842+
0x081a: "sr_CS@latin", # Serbian (Latin) - Serbia and Montenegro (Former)
1843+
0x0c1a: "sr_CS@cyrillic", # Serbian (Cyrillic) - Serbia and Montenegro (Former)
1844+
0x181a: "sr_BA@latin", # Serbian (Latin) - Bosnia and Herzegovina
1845+
0x1c1a: "sr_BA@cyrillic", # Serbian (Cyrillic) - Bosnia and Herzegovina
1846+
0x241a: "sr_RS@latin", # Serbian (Latin) - Serbia
1847+
0x281a: "sr_RS@cyrillic", # Serbian (Cyrillic) - Serbia
1848+
0x2c1a: "sr_ME@latin", # Serbian (Latin) - Montenegro
1849+
0x301a: "sr_ME@cyrillic", # Serbian (Cyrillic) - Montenegro
18241850
0x006c: "nso", # Sesotho sa Leboa
18251851
0x046c: "nso_ZA", # Sesotho sa Leboa - South Africa
18261852
0x0032: "tn", # Setswana
18271853
0x0432: "tn_ZA", # Setswana - South Africa
18281854
0x0832: "tn_BW", # Setswana - Botswana
18291855
0x0059: "sd", # Sindhi
1830-
0x7c59: "sd", # Sindhi
1831-
0x0859: "sd_PK", # Sindhi - Islamic Republic of Pakistan
1856+
0x7c59: "sd@arabic", # Sindhi
1857+
0x0859: "sd_PK@arabic", # Sindhi - Islamic Republic of Pakistan
18321858
0x005b: "si", # Sinhala
18331859
0x045b: "si_LK", # Sinhala - Sri Lanka
18341860
0x001b: "sk", # Slovak
@@ -1867,14 +1893,14 @@ def getpreferredencoding(do_setlocale=True):
18671893
0x005a: "syr", # Syriac
18681894
0x045a: "syr_SY", # Syriac - Syria
18691895
0x0028: "tg", # Tajik (Cyrillic)
1870-
0x7c28: "tg", # Tajik (Cyrillic)
1871-
0x0428: "tg_TJ", # Tajik (Cyrillic) - Tajikistan
1896+
0x7c28: "tg@cyrillic", # Tajik (Cyrillic)
1897+
0x0428: "tg_TJ@cyrillic", # Tajik (Cyrillic) - Tajikistan
18721898
0x005f: "tzm", # Tamazight (Latin)
1873-
0x785f: "tzm",
1874-
0x7c5f: "tzm", # Tamazight (Latin)
1875-
0x085f: "tzm_DZ", # Tamazight (Latin) - Algeria
1876-
0x045f: "tzm_MA", # Central Atlas Tamazight (Arabic) - Morocco
1877-
0x105f: "tzm_MA",
1899+
0x785f: "tzm@Tfng",
1900+
0x7c5f: "tzm@latin", # Tamazight (Latin)
1901+
0x085f: "tzm_DZ@latin", # Tamazight (Latin) - Algeria
1902+
0x045f: "tzm_MA@arabic", # Central Atlas Tamazight (Arabic) - Morocco
1903+
0x105f: "tzm_MA@Tfng",
18781904
0x0049: "ta", # Tamil
18791905
0x0449: "ta_IN", # Tamil - India
18801906
0x0849: "ta_LK", # Tamil - Sri Lanka
@@ -1905,9 +1931,9 @@ def getpreferredencoding(do_setlocale=True):
19051931
0x0080: "ug", # Uyghur
19061932
0x0480: "ug_CN", # Uyghur - People's Republic of China
19071933
0x0043: "uz", # Uzbek (Latin)
1908-
0x7843: "uz", # Uzbek (Cyrillic)
1909-
0x7c43: "uz", # Uzbek (Latin)
1910-
0x0443: "uz_UZ", # Uzbek (Latin) - Uzbekistan
1934+
0x7843: "uz@cyrillic", # Uzbek (Cyrillic)
1935+
0x7c43: "uz@latin", # Uzbek (Latin)
1936+
0x0443: "uz_UZ@latin", # Uzbek (Latin) - Uzbekistan
19111937
0x0033: "ve", # Venda
19121938
0x0433: "ve_ZA", # Venda - South Africa
19131939
0x002a: "vi", # Vietnamese
@@ -1943,6 +1969,16 @@ def getpreferredencoding(do_setlocale=True):
19431969
0x00051004: "zh_SG",
19441970
}
19451971

1972+
# Maps Unix-like modifiers to ISO15924 script names
1973+
# https://www.unicode.org/iso15924/iso15924.txt
1974+
1975+
_modifier_to_script = {
1976+
'arabic': 'Arab',
1977+
'cyrillic': 'Cyrl',
1978+
'devanagari': 'Deva',
1979+
'latin': 'Latn',
1980+
}
1981+
19461982
def _print_locale():
19471983

19481984
""" Test function.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`locale.setlocale` now supports Unix-like locale names with
2+
@-modifiers on Windows. For example: "ca_ES@valencia", "sr_RS@latin",
3+
"uz_UZ@cyrillic" and "ks_IN@devanagari".

0 commit comments

Comments
 (0)