From 7c9c2969b75390c9fca21d3ee206706103ba7812 Mon Sep 17 00:00:00 2001 From: Leandro Regueiro Date: Sat, 14 Jan 2023 19:25:58 +0100 Subject: [PATCH] Improve malaysian ITN implementation Note that TF prefix is not implemented because that might be a typo, since it doesn't appear on the lists. Fixes #113 Closes #237 --- stdnum/my/__init__.py | 4 ++ stdnum/my/itn.py | 111 +++++++++++++++++++++++++++----------- tests/test_my_itn.doctest | 82 ++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+), 30 deletions(-) create mode 100644 tests/test_my_itn.doctest diff --git a/stdnum/my/__init__.py b/stdnum/my/__init__.py index e20908ed..949f3950 100644 --- a/stdnum/my/__init__.py +++ b/stdnum/my/__init__.py @@ -2,6 +2,7 @@ # coding: utf-8 # # Copyright (C) 2013 Arthur de Jong +# Copyright (C) 2023 Leandro Regueiro # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -19,3 +20,6 @@ # 02110-1301 USA """Collection of Malaysian numbers.""" + +# provide aliases +from stdnum.my import itn as vat # noqa: F401 diff --git a/stdnum/my/itn.py b/stdnum/my/itn.py index f2873260..f84284e5 100644 --- a/stdnum/my/itn.py +++ b/stdnum/my/itn.py @@ -1,6 +1,7 @@ # itn.py - functions for handling ITN numbers # # Copyright (C) 2020 Sergi Almacellas Abellana +# Copyright (C) 2023 Leandro Regueiro # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -21,65 +22,115 @@ The number is assigned by The Inland Revenue Board of Malaysia (IRBM) and it is required to report the income. This unique number is known as -"Nombor CukaiPendapatan" or Income Tax Number. +"Nombor Cukai Pendapatan" or Income Tax Number (ITN). -The number consist of 11 or 12 digits. It is structured by two types, normally -separated by an space. The first one consists of 1 or 2 leters and represents -the type of the file number. The second one is always ten digits an represents -the tax number. +For individuals the ITN consists on the 2 letters Type of File Number (SG for +individual resident or OG for individual non-resident) followed by a space, and +ending with the Income Tax Number (maximum 11 digits). ->>> validate('C2584563202') +For Non-Individuals the ITN consists on the Type of File Number (1 or 2 letters) +followed by a space, and ending with the Income Tax Number (maximum 10 digits). +The Type of File Number for Non-Individuals can be one of the following: + +* C: Company, Pte. Ltd. Company, Limited Company or Non-Resident Company. +* CS: Cooperative Society. +* D: Partnership. +* E: Employer. +* F: Association. +* FA: Non-Resident Public Entertainer. +* PT: Limited Liability Partnership. +* TA: Trust Body. +* TC: Unit Trust/ Property Trust. +* TN: Business Trust. +* TR: Real Estate Investment Trust/ Property Trust Fund. +* TP: Deceased Person's Estate. +* TJ: Hindu Joint Family. +* LE: Labuan Entity. + +>>> validate('SG 10234567090') +'SG10234567090' +>>> validate('OG 25845632021') +'OG25845632021' +>>> validate('C 2584563202') 'C2584563202' ->>> validate('CDB2584563202') # Should contain the prefix +>>> validate('1') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> validate('12345678901234') Traceback (most recent call last): ... -InvalidLength: ... ->>> validate('CD12346789012') # Should contain the prefix +InvalidComponent: ... +>>> validate('12345') Traceback (most recent call last): ... -InvalidLength: ... ->>> validate('C258456320B') # number should only contain digits +InvalidComponent: ... +>>> validate('X 12345') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> validate('C 12345X') Traceback (most recent call last): ... InvalidFormat: ... >>> format('C2584563202') 'C 2584563202' +>>> format('SG10234567090') +'SG 10234567090' """ from stdnum.exceptions import * from stdnum.util import clean, isdigits -def compact(number): - """Convert the number to the minimal representation. This strips the - number of any valid separators and removes surrounding whitespace.""" - return clean(number, ' -*').strip() +PREFIXES_11_DIGITS = ('SG', 'OG') +PREFIXES_10_DIGITS = ('C', 'CS', 'D', 'E', 'F', 'FA', 'PT', 'TA', 'TC', 'TN', + 'TR', 'TP', 'TJ', 'LE') +VALID_PREFIXES = PREFIXES_11_DIGITS + PREFIXES_10_DIGITS -def split(number): - number = compact(number) - index = 10 - if len(number) > 12: - index += 11 - return number[:-index], number[-index:] +def _get_prefix_and_number(number): + """Return the number separated in prefix and numerical part. + + This assumes the number has been previously compacted. + """ + for i, c in enumerate(number): + if c.isdigit(): + return number[:i], number[i:] + return number, '' + + +def compact(number): + """Convert the number to the minimal representation. + + This strips the number of any valid separators and removes surrounding + whitespace. + """ + return clean(number, ' -*').strip().upper() def validate(number): - """Check if the number is a valid NRIC number. This checks the length, - formatting and birth date and place.""" + """Check if the number is a valid ITN number. + + This checks the length and formatting. + """ number = compact(number) - if len(number) > 13 or len(number) <= 10: - raise InvalidLength() - prefix, digits = split(number) - if not prefix or len(prefix) > 2: - raise InvalidLength() + prefix, digits = _get_prefix_and_number(number) + if prefix not in VALID_PREFIXES: + raise InvalidComponent() + if not digits: + raise InvalidComponent() + if prefix in PREFIXES_11_DIGITS and len(digits) > 11: + raise InvalidComponent() + if prefix in PREFIXES_10_DIGITS and len(digits) > 10: + raise InvalidComponent() if not isdigits(digits): raise InvalidFormat() return number def is_valid(number): - """Check if the number is a valid NRIC number.""" + """Check if the number is a valid ITN number.""" try: return bool(validate(number)) except ValidationError: @@ -88,4 +139,4 @@ def is_valid(number): def format(number): """Reformat the number to the standard presentation format.""" - return ' '.join(split(number)) + return ' '.join(_get_prefix_and_number(compact(number))) diff --git a/tests/test_my_itn.doctest b/tests/test_my_itn.doctest new file mode 100644 index 00000000..0dae3a3e --- /dev/null +++ b/tests/test_my_itn.doctest @@ -0,0 +1,82 @@ +test_my_itn.doctest - more detailed doctests for stdnum.my.itn module + +Copyright (C) 2023 Leandro Regueiro + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA + + +This file contains more detailed doctests for the stdnum.my.itn module. It +tries to test more corner cases and detailed functionality that is not really +useful as module documentation. + +>>> from stdnum.my import itn + + +Tests for some corner cases. + +>>> itn.validate('SG 10234567090') +'SG10234567090' +>>> itn.validate('OG 25845632021') +'OG25845632021' +>>> itn.validate('C 2584563202') +'C2584563202' +>>> itn.validate('X 12345') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> itn.validate('12345') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> itn.validate('C') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> itn.validate('SG 123456789012') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> itn.validate('C 12345678901') +Traceback (most recent call last): + ... +InvalidComponent: ... +>>> itn.validate('C 12345X') +Traceback (most recent call last): + ... +InvalidFormat: ... +>>> itn.format('C2584563202') +'C 2584563202' +>>> itn.format('SG10234567090') +'SG 10234567090' + + +These have been found online and should all be valid numbers. + +>>> numbers = ''' +... +... C 2128186207 +... C 2354867110 +... C 2493192407 +... F 1064671704 +... OG 04455987090 +... SG 2178656-09 +... SG 10234567090 +... OG 25845632021 +... C 2584563202 +... +... ''' +>>> [x for x in numbers.splitlines() if x and not itn.is_valid(x)] +[]