Skip to content

Commit

Permalink
Handle 'localeRules="nonlikelyScript"' for parent locales
Browse files Browse the repository at this point in the history
Locales of the form 'lang_Script' where 'Script' is not the
likely script for 'lang' should have 'root' as their parent
locale. For example, the parent of 'az_Arab' should not be
computed as 'az' by truncating from the end, but should be
'root' instead as 'Arab' is not the likely script for 'az'.

The list of such languages was previously specified using
an explicit 'locales' attribute. It is now handled dynamically
using the new 'localeRules' attribute.
  • Loading branch information
tomasr8 authored and akx committed Jul 11, 2024
1 parent 956a4f3 commit 7d84b67
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 2 deletions.
28 changes: 26 additions & 2 deletions babel/localedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ def locale_identifiers() -> list[str]:
]


def _is_non_likely_script(name: str) -> bool:
"""Return whether the locale is of the form ``lang_Script``,
and the script is not the likely script for the language.
This implements the behavior of the ``nonlikelyScript`` value of the
``localRules`` attribute for parent locales added in CLDR 45.
"""
from babel.core import get_global, parse_locale

try:
lang, territory, script, variant, *rest = parse_locale(name)
except ValueError:
return False

if lang and script and not territory and not variant and not rest:
likely_subtag = get_global('likely_subtags').get(lang)
_, _, likely_script, *_ = parse_locale(likely_subtag)
return script != likely_script
return False


def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
"""Load the locale data for the given locale.
Expand Down Expand Up @@ -132,8 +153,11 @@ def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str
from babel.core import get_global
parent = get_global('parent_exceptions').get(name)
if not parent:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
if _is_non_likely_script(name):
parent = 'root'
else:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
data = load(parent).copy()
filename = resolve_locale_filename(name)
with open(filename, 'rb') as fileobj:
Expand Down
5 changes: 5 additions & 0 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,11 @@ def parse_global(srcdir, sup):

for paternity in parentBlock.findall('./parentLocale'):
parent = paternity.attrib['parent']
if parent == 'root':
# Since CLDR-45, the 'root' parent locale uses 'localeRules="nonlikelyScript"' instead of
# 'locales'. This special case is handled in babel when loading locale data
# (https://cldr.unicode.org/index/downloads/cldr-45#h.5rbkhkncdqi9)
continue
for child in paternity.attrib['locales'].split():
parent_exceptions[child] = parent

Expand Down
15 changes: 15 additions & 0 deletions tests/test_localedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ def test_load():
assert localedata.load('en_US') is localedata.load('en_US')


def test_load_inheritance(monkeypatch):
from babel.localedata import _cache

_cache.clear()
localedata.load('hi_Latn')
# Must not be ['root', 'hi_Latn'] even though 'hi_Latn' matches the 'lang_Script'
# form used by 'nonLikelyScripts'. This is because 'hi_Latn' has an explicit parent locale 'en_IN'.
assert list(_cache.keys()) == ['root', 'en', 'en_001', 'en_IN', 'hi_Latn']

_cache.clear()
localedata.load('az_Arab')
# Must not include 'az' as 'Arab' is not a likely script for 'az'.
assert list(_cache.keys()) == ['root', 'az_Arab']


def test_merge():
d = {1: 'foo', 3: 'baz'}
localedata.merge(d, {1: 'Foo', 2: 'Bar'})
Expand Down

0 comments on commit 7d84b67

Please sign in to comment.