Merge pull request #292 from akx/cldr28

Upgrade to CLDR 28
python-babel · Jan 1, 2016 · 515d1c6 · 515d1c6
2 parents 6af564f + 9f7f4d0
commit 515d1c6
Show file tree

Hide file tree

Showing 10 changed files with 77 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,7 @@ dist
 test-env
 **/__pycache__
 babel/global.dat
+babel/global.dat.json
 tests/messages/data/project/i18n/long_messages.pot
 tests/messages/data/project/i18n/temp.pot
 tests/messages/data/project/i18n/en_US

diff --git a/babel/core.py b/babel/core.py
@@ -113,10 +113,10 @@ class Locale(object):
     If a locale is requested for which no locale data is available, an
     `UnknownLocaleError` is raised:
 
-    >>> Locale.parse('en_DE')
+    >>> Locale.parse('en_XX')
     Traceback (most recent call last):
         ...
-    UnknownLocaleError: unknown locale 'en_DE'
+    UnknownLocaleError: unknown locale 'en_XX'
 
     For more information see :rfc:`3066`.
     """
@@ -435,8 +435,8 @@ def get_script_name(self, locale=None):
     script_name = property(get_script_name, doc="""\
         The localized script name of the locale if available.
 
-        >>> Locale('ms', 'SG', script='Latn').script_name
-        u'Latin'
+        >>> Locale('sr', 'ME', script='Latn').script_name
+        u'latinica'
     """)
 
     @property

diff --git a/babel/dates.py b/babel/dates.py
@@ -582,7 +582,7 @@ def format_datetime(datetime=None, format='medium', tzinfo=None,
 
     >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'),
     ...                 locale='fr_FR')
-    u'dimanche 1 avril 2007 17:30:00 heure d\u2019\xe9t\xe9 d\u2019Europe centrale'
+    u'dimanche 1 avril 2007 \xe0 17:30:00 heure d\u2019\xe9t\xe9 d\u2019Europe centrale'
     >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
     ...                 tzinfo=get_timezone('US/Eastern'), locale='en')
     u'2007.04.01 AD at 11:30:00 EDT'
@@ -742,7 +742,7 @@ def format_timedelta(delta, granularity='second', threshold=.85,
     The format parameter controls how compact or wide the presentation is:
 
     >>> format_timedelta(timedelta(hours=3), format='short', locale='en')
-    u'3 hrs'
+    u'3 hr'
     >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en')
     u'3h'
 

diff --git a/babel/numbers.py b/babel/numbers.py
@@ -265,7 +265,7 @@ def format_currency(number, currency, format=None, locale=LC_NUMERIC,
     >>> format_currency(1099.98, 'USD', locale='en_US')
     u'$1,099.98'
     >>> format_currency(1099.98, 'USD', locale='es_CO')
-    u'US$1.099,98'
+    u'US$\\xa01.099,98'
     >>> format_currency(1099.98, 'EUR', locale='de_DE')
     u'1.099,98\\xa0\\u20ac'
 

diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py
@@ -5,17 +5,16 @@
 import shutil
 import hashlib
 import zipfile
-import urllib
 import subprocess
 try:
     from urllib.request import urlretrieve
 except ImportError:
     from urllib import urlretrieve
 
 
-URL = 'http://unicode.org/Public/cldr/26/core.zip'
-FILENAME = 'core-26.zip'
-FILESUM = '46220170238b092685fd24221f895e3d'
+URL = 'http://unicode.org/Public/cldr/28/core.zip'
+FILENAME = 'core-28.zip'
+FILESUM = 'bc545b4c831e1987ea931b04094d7b9fc59ec3d8'
 BLKSIZE = 131072
 
 
@@ -53,7 +52,7 @@ def is_good_file(filename):
     if not os.path.isfile(filename):
         log('Local copy \'%s\' not found', filename)
         return False
-    h = hashlib.md5()
+    h = hashlib.sha1()
     with open(filename, 'rb') as f:
         while 1:
             blk = f.read(BLKSIZE)
@@ -71,8 +70,9 @@ def is_good_file(filename):
 def main():
     scripts_path = os.path.dirname(os.path.abspath(__file__))
     repo = os.path.dirname(scripts_path)
-    cldr_path = os.path.join(repo, 'cldr')
-    zip_path = os.path.join(cldr_path, FILENAME)
+    cldr_dl_path = os.path.join(repo, 'cldr')
+    cldr_path = os.path.join(repo, 'cldr', os.path.splitext(FILENAME)[0])
+    zip_path = os.path.join(cldr_dl_path, FILENAME)
     changed = False
 
     while not is_good_file(zip_path):

diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
@@ -122,12 +122,37 @@ def _extract_plural_rules(file_path):
     return rule_dict
 
 
+def debug_repr(obj):
+    if isinstance(obj, PluralRule):
+        return obj.abstract
+    return repr(obj)
+
+
+def write_datafile(path, data, dump_json=False):
+    with open(path, 'wb') as outfile:
+        pickle.dump(data, outfile, 2)
+    if dump_json:
+        import json
+        with open(path + '.json', 'w') as outfile:
+            json.dump(data, outfile, indent=4, default=debug_repr)
+
+
 def main():
     parser = OptionParser(usage='%prog path/to/cldr')
+    parser.add_option(
+        '-f', '--force', dest='force', action='store_true', default=False,
+        help='force import even if destination file seems up to date'
+    )
+    parser.add_option(
+        '-j', '--json', dest='dump_json', action='store_true', default=False,
+        help='also export debugging JSON dumps of locale data'
+    )
+
     options, args = parser.parse_args()
     if len(args) != 1:
         parser.error('incorrect number of arguments')
-
+    force = bool(options.force)
+    dump_json = bool(options.dump_json)
     srcdir = args[0]
     destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
                            '..', 'babel')
@@ -145,7 +170,7 @@ def main():
     # Import global data from the supplemental files
     global_path = os.path.join(destdir, 'global.dat')
     global_data = {}
-    if need_conversion(global_path, global_data, sup_filename):
+    if force or need_conversion(global_path, global_data, sup_filename):
         territory_zones = global_data.setdefault('territory_zones', {})
         zone_aliases = global_data.setdefault('zone_aliases', {})
         zone_territories = global_data.setdefault('zone_territories', {})
@@ -251,11 +276,7 @@ def main():
             cur_crounding = int(fraction.attrib.get('cashRounding', cur_rounding))
             currency_fractions[cur_code] = (cur_digits, cur_rounding, cur_cdigits, cur_crounding)
 
-        outfile = open(global_path, 'wb')
-        try:
-            pickle.dump(global_data, outfile, 2)
-        finally:
-            outfile.close()
+        write_datafile(global_path, global_data, dump_json=dump_json)
 
     # build a territory containment mapping for inheritance
     regions = {}
@@ -290,7 +311,7 @@ def main():
         data_filename = os.path.join(destdir, 'locale-data', stem + '.dat')
 
         data = {}
-        if not need_conversion(data_filename, data, full_filename):
+        if not (force or need_conversion(data_filename, data, full_filename)):
             continue
 
         tree = parse(full_filename)
@@ -600,21 +621,7 @@ def main():
             scientific_formats[elem.attrib.get('type')] = \
                 numbers.parse_pattern(pattern)
 
-        currency_formats = data.setdefault('currency_formats', {})
-        for elem in tree.findall('.//currencyFormats/currencyFormatLength/currencyFormat'):
-            if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                    and elem.attrib.get('type') in currency_formats:
-                continue
-            for child in elem.getiterator():
-                if child.tag == 'alias':
-                    currency_formats[elem.attrib.get('type')] = Alias(
-                        _translate_alias(['currency_formats', elem.attrib['type']],
-                                         child.attrib['path'])
-                    )
-                elif child.tag == 'pattern':
-                    pattern = text_type(child.text)
-                    currency_formats[elem.attrib.get('type')] = \
-                        numbers.parse_pattern(pattern)
+        parse_currency_formats(data, tree)
 
         percent_formats = data.setdefault('percent_formats', {})
         for elem in tree.findall('.//percentFormats/percentFormatLength'):
@@ -667,11 +674,29 @@ def main():
                     date_fields[field_type].setdefault(rel_time_type, {})\
                         [pattern.attrib['count']] = text_type(pattern.text)
 
-        outfile = open(data_filename, 'wb')
-        try:
-            pickle.dump(data, outfile, 2)
-        finally:
-            outfile.close()
+        write_datafile(data_filename, data, dump_json=dump_json)
+
+
+def parse_currency_formats(data, tree):
+    currency_formats = data.setdefault('currency_formats', {})
+    for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'):
+        curr_length_type = length_elem.attrib.get('type')
+        for elem in length_elem.findall('currencyFormat'):
+            type = elem.attrib.get('type')
+            if curr_length_type:
+                # Handle `<currencyFormatLength type="short">`, etc.
+                type = '%s:%s' % (type, curr_length_type)
+            if ('draft' in elem.attrib or 'alt' in elem.attrib) and type in currency_formats:
+                continue
+            for child in elem.getiterator():
+                if child.tag == 'alias':
+                    currency_formats[type] = Alias(
+                            _translate_alias(['currency_formats', elem.attrib['type']],
+                                             child.attrib['path'])
+                    )
+                elif child.tag == 'pattern':
+                    pattern = text_type(child.text)
+                    currency_formats[type] = numbers.parse_pattern(pattern)
 
 
 if __name__ == '__main__':

diff --git a/tests/messages/test_plurals.py b/tests/messages/test_plurals.py
@@ -34,7 +34,7 @@ def test_get_plural_accpets_strings():
 
 
 def test_get_plural_falls_back_to_default():
-    assert plurals.get_plural('aa') == (2, '(n != 1)')
+    assert plurals.get_plural('ii') == (2, '(n != 1)')
 
 
 def test_get_plural():

diff --git a/tests/test_dates.py b/tests/test_dates.py
@@ -282,14 +282,14 @@ def test_zero_seconds(self):
         self.assertEqual('0 seconds', string)
         string = dates.format_timedelta(timedelta(seconds=0), locale='en',
                                         format='short')
-        self.assertEqual('0 secs', string)
+        self.assertEqual('0 sec', string)
         string = dates.format_timedelta(timedelta(seconds=0),
                                         granularity='hour', locale='en')
         self.assertEqual('0 hours', string)
         string = dates.format_timedelta(timedelta(seconds=0),
                                         granularity='hour', locale='en',
                                         format='short')
-        self.assertEqual('0 hrs', string)
+        self.assertEqual('0 hr', string)
 
     def test_small_value_with_granularity(self):
         string = dates.format_timedelta(timedelta(seconds=42),
@@ -465,7 +465,7 @@ def test_format_datetime():
 
     full = dates.format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'),
                                  locale='fr_FR')
-    assert full == (u'dimanche 1 avril 2007 17:30:00 heure '
+    assert full == (u'dimanche 1 avril 2007 à 17:30:00 heure '
                     u'd\u2019\xe9t\xe9 d\u2019Europe centrale')
     custom = dates.format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
                                    tzinfo=timezone('US/Eastern'), locale='en')

diff --git a/tests/test_numbers.py b/tests/test_numbers.py
@@ -230,7 +230,7 @@ def test_format_currency():
     assert (numbers.format_currency(1099.98, 'USD', locale='en_US')
             == u'$1,099.98')
     assert (numbers.format_currency(1099.98, 'USD', locale='es_CO')
-            == u'US$1.099,98')
+            == u'US$\xa01.099,98')
     assert (numbers.format_currency(1099.98, 'EUR', locale='de_DE')
             == u'1.099,98\xa0\u20ac')
     assert (numbers.format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00',

diff --git a/tests/test_plural.py b/tests/test_plural.py
@@ -133,10 +133,10 @@ def test_plural_within_rules():
 
 def test_locales_with_no_plural_rules_have_default():
     from babel import Locale
-    aa_plural = Locale.parse('aa').plural_form
-    assert aa_plural(1) == 'other'
-    assert aa_plural(2) == 'other'
-    assert aa_plural(15) == 'other'
+    pf = Locale.parse('ii').plural_form
+    assert pf(1) == 'other'
+    assert pf(2) == 'other'
+    assert pf(15) == 'other'
 
 
 WELL_FORMED_TOKEN_TESTS = (