Skip to content

Commit

Permalink
Improve malaysian ITN implementation
Browse files Browse the repository at this point in the history
Note that TF prefix is not implemented because that might be a typo, since it
doesn't appear on the lists.

Fixes arthurdejong#113
Closes arthurdejong#237
  • Loading branch information
unho committed Jan 14, 2023
1 parent ddb2092 commit 7c9c296
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 30 deletions.
4 changes: 4 additions & 0 deletions stdnum/my/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# coding: utf-8
#
# Copyright (C) 2013 Arthur de Jong
# Copyright (C) 2023 Leandro Regueiro
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
Expand All @@ -19,3 +20,6 @@
# 02110-1301 USA

"""Collection of Malaysian numbers."""

# provide aliases
from stdnum.my import itn as vat # noqa: F401
111 changes: 81 additions & 30 deletions stdnum/my/itn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# itn.py - functions for handling ITN numbers
#
# Copyright (C) 2020 Sergi Almacellas Abellana
# Copyright (C) 2023 Leandro Regueiro
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
Expand All @@ -21,65 +22,115 @@
The number is assigned by The Inland Revenue Board of Malaysia (IRBM) and it
is required to report the income. This unique number is known as
"Nombor CukaiPendapatan" or Income Tax Number.
"Nombor Cukai Pendapatan" or Income Tax Number (ITN).
The number consist of 11 or 12 digits. It is structured by two types, normally
separated by an space. The first one consists of 1 or 2 leters and represents
the type of the file number. The second one is always ten digits an represents
the tax number.
For individuals the ITN consists on the 2 letters Type of File Number (SG for
individual resident or OG for individual non-resident) followed by a space, and
ending with the Income Tax Number (maximum 11 digits).
>>> validate('C2584563202')
For Non-Individuals the ITN consists on the Type of File Number (1 or 2 letters)
followed by a space, and ending with the Income Tax Number (maximum 10 digits).
The Type of File Number for Non-Individuals can be one of the following:
* C: Company, Pte. Ltd. Company, Limited Company or Non-Resident Company.
* CS: Cooperative Society.
* D: Partnership.
* E: Employer.
* F: Association.
* FA: Non-Resident Public Entertainer.
* PT: Limited Liability Partnership.
* TA: Trust Body.
* TC: Unit Trust/ Property Trust.
* TN: Business Trust.
* TR: Real Estate Investment Trust/ Property Trust Fund.
* TP: Deceased Person's Estate.
* TJ: Hindu Joint Family.
* LE: Labuan Entity.
>>> validate('SG 10234567090')
'SG10234567090'
>>> validate('OG 25845632021')
'OG25845632021'
>>> validate('C 2584563202')
'C2584563202'
>>> validate('CDB2584563202') # Should contain the prefix
>>> validate('1')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> validate('12345678901234')
Traceback (most recent call last):
...
InvalidLength: ...
>>> validate('CD12346789012') # Should contain the prefix
InvalidComponent: ...
>>> validate('12345')
Traceback (most recent call last):
...
InvalidLength: ...
>>> validate('C258456320B') # number should only contain digits
InvalidComponent: ...
>>> validate('X 12345')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> validate('C 12345X')
Traceback (most recent call last):
...
InvalidFormat: ...
>>> format('C2584563202')
'C 2584563202'
>>> format('SG10234567090')
'SG 10234567090'
"""

from stdnum.exceptions import *
from stdnum.util import clean, isdigits


def compact(number):
"""Convert the number to the minimal representation. This strips the
number of any valid separators and removes surrounding whitespace."""
return clean(number, ' -*').strip()
PREFIXES_11_DIGITS = ('SG', 'OG')
PREFIXES_10_DIGITS = ('C', 'CS', 'D', 'E', 'F', 'FA', 'PT', 'TA', 'TC', 'TN',
'TR', 'TP', 'TJ', 'LE')
VALID_PREFIXES = PREFIXES_11_DIGITS + PREFIXES_10_DIGITS


def split(number):
number = compact(number)
index = 10
if len(number) > 12:
index += 11
return number[:-index], number[-index:]
def _get_prefix_and_number(number):
"""Return the number separated in prefix and numerical part.
This assumes the number has been previously compacted.
"""
for i, c in enumerate(number):
if c.isdigit():
return number[:i], number[i:]
return number, ''


def compact(number):
"""Convert the number to the minimal representation.
This strips the number of any valid separators and removes surrounding
whitespace.
"""
return clean(number, ' -*').strip().upper()


def validate(number):
"""Check if the number is a valid NRIC number. This checks the length,
formatting and birth date and place."""
"""Check if the number is a valid ITN number.
This checks the length and formatting.
"""
number = compact(number)
if len(number) > 13 or len(number) <= 10:
raise InvalidLength()
prefix, digits = split(number)
if not prefix or len(prefix) > 2:
raise InvalidLength()
prefix, digits = _get_prefix_and_number(number)
if prefix not in VALID_PREFIXES:
raise InvalidComponent()
if not digits:
raise InvalidComponent()
if prefix in PREFIXES_11_DIGITS and len(digits) > 11:
raise InvalidComponent()
if prefix in PREFIXES_10_DIGITS and len(digits) > 10:
raise InvalidComponent()
if not isdigits(digits):
raise InvalidFormat()
return number


def is_valid(number):
"""Check if the number is a valid NRIC number."""
"""Check if the number is a valid ITN number."""
try:
return bool(validate(number))
except ValidationError:
Expand All @@ -88,4 +139,4 @@ def is_valid(number):

def format(number):
"""Reformat the number to the standard presentation format."""
return ' '.join(split(number))
return ' '.join(_get_prefix_and_number(compact(number)))
82 changes: 82 additions & 0 deletions tests/test_my_itn.doctest
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
test_my_itn.doctest - more detailed doctests for stdnum.my.itn module

Copyright (C) 2023 Leandro Regueiro

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA


This file contains more detailed doctests for the stdnum.my.itn module. It
tries to test more corner cases and detailed functionality that is not really
useful as module documentation.

>>> from stdnum.my import itn


Tests for some corner cases.

>>> itn.validate('SG 10234567090')
'SG10234567090'
>>> itn.validate('OG 25845632021')
'OG25845632021'
>>> itn.validate('C 2584563202')
'C2584563202'
>>> itn.validate('X 12345')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> itn.validate('12345')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> itn.validate('C')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> itn.validate('SG 123456789012')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> itn.validate('C 12345678901')
Traceback (most recent call last):
...
InvalidComponent: ...
>>> itn.validate('C 12345X')
Traceback (most recent call last):
...
InvalidFormat: ...
>>> itn.format('C2584563202')
'C 2584563202'
>>> itn.format('SG10234567090')
'SG 10234567090'


These have been found online and should all be valid numbers.

>>> numbers = '''
...
... C 2128186207
... C 2354867110
... C 2493192407
... F 1064671704
... OG 04455987090
... SG 2178656-09
... SG 10234567090
... OG 25845632021
... C 2584563202
...
... '''
>>> [x for x in numbers.splitlines() if x and not itn.is_valid(x)]
[]

0 comments on commit 7c9c296

Please sign in to comment.