Skip to content

Commit

Permalink
Merge pull request #231 from remusao/update
Browse files Browse the repository at this point in the history
feature: add new 'getDomainWithoutSuffix(...)' method
  • Loading branch information
remusao authored Aug 29, 2019
2 parents 5afca15 + 9a8f2e8 commit 4e1d859
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 12 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

*not released*

- Update public suffix ruleset
- Add new `getDomainWithoutSuffix(...)` method

### 5.3.2

*2019-07-26*
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Using the command-line interface:
$ npx tldts 'http://www.writethedocs.org/conf/eu/2017/'
{
"domain": "writethedocs.org",
"domainWithoutSuffix": "writethedocs",
"hostname": "www.writethedocs.org",
"isIcann": true,
"isIp": false,
Expand All @@ -47,6 +48,7 @@ const { parse } = require('tldts');
// Retrieving hostname related informations of a given URL
parse('http://www.writethedocs.org/conf/eu/2017/');
// { domain: 'writethedocs.org',
// domainWithoutSuffix: 'writethedocs',
// hostname: 'www.writethedocs.org',
// isIcann: true,
// isIp: false,
Expand Down
12 changes: 12 additions & 0 deletions packages/tldts-core/src/domain-without-suffix.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

/**
* Return the part of domain without suffix.
*
* Example: for domain 'foo.com', the result would be 'foo'.
*/
export default function getDomainWithoutSuffix(domain: string, suffix: string): string {
// Note: here `domain` and `suffix` cannot have the same length because in
// this case we set `domain` to `null` instead. It is thus safe to assume
// that `suffix` is shorter than `domain`.
return domain.slice(0, -suffix.length - 1);
}
12 changes: 12 additions & 0 deletions packages/tldts-core/src/factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

import getDomain from './domain';
import getDomainWithoutSuffix from './domain-without-suffix';
import extractHostname from './extract-hostname';
import isIp from './is-ip';
import isValidHostname from './is-valid';
Expand All @@ -26,6 +27,7 @@ export interface IResult {
subdomain: string | null;
domain: string | null;
publicSuffix: string | null;
domainWithoutSuffix: string | null;

// Specifies if `publicSuffix` comes from the ICANN or PRIVATE section of the list
isIcann: boolean | null;
Expand Down Expand Up @@ -58,6 +60,7 @@ export function parseImpl(
const options: IOptions = setDefaults(partialOptions);
const result: IResult = {
domain: null,
domainWithoutSuffix: null,
hostname: null,
isIcann: null,
isIp: null,
Expand Down Expand Up @@ -128,6 +131,15 @@ export function parseImpl(

// Extract subdomain
result.subdomain = getSubdomain(result.hostname, result.domain);
if (step === FLAG.SUB_DOMAIN) {
return result;
}

// Extract domain without suffix
result.domainWithoutSuffix = getDomainWithoutSuffix(
result.domain,
result.publicSuffix,
);

return result;
}
7 changes: 7 additions & 0 deletions packages/tldts-experimental/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ export function getHostname(
): string | null {
return parseImpl(url, FLAG.HOSTNAME, suffixLookup, options).hostname;
}

export function getDomainWithoutSuffix(
url: string,
options?: Partial<IOptions>,
): string | null {
return parseImpl(url, FLAG.ALL, suffixLookup, options).domainWithoutSuffix;
}
2 changes: 1 addition & 1 deletion packages/tldts-experimental/src/data/hashes.ts

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions packages/tldts-tests/src/tldts-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,20 @@ export default function test(tldts: any): void {
});
});

describe('getDomainWithoutSuffix method', () => {
it('should return null if the domain cannot be found', () => {
expect(tldts.getDomainWithoutSuffix('not-a-validHost')).toEqual(null);
});

it('should return null if domain and suffix are the same', () => {
expect(tldts.getDomainWithoutSuffix('co.uk')).toEqual(null);
});

it('should return domain without suffix if domain exists', () => {
expect(tldts.getDomainWithoutSuffix('https://sub.foo.co.uk')).toEqual('foo');
});
});

describe('getSubdomain method', () => {
it('should return null if the domain cannot be found', () => {
expect(tldts.getSubdomain('not-a-validHost')).toEqual(null);
Expand Down Expand Up @@ -637,6 +651,7 @@ export default function test(tldts: any): void {
const mockResponse = (hostname: string) => {
return {
domain: null,
domainWithoutSuffix: null,
hostname,
isIcann: null,
isIp: true,
Expand All @@ -649,6 +664,7 @@ export default function test(tldts: any): void {
it('fallback to wildcard', () => {
expect(tldts.parse('https://foo.bar.badasdasdada')).toEqual({
domain: 'bar.badasdasdada',
domainWithoutSuffix: 'bar',
hostname: 'foo.bar.badasdasdada',
isIcann: false,
isIp: false,
Expand Down Expand Up @@ -683,6 +699,7 @@ export default function test(tldts: any): void {
it('disable ip detection', () => {
expect(tldts.parse('http://192.168.0.1/', { detectIp: false })).toEqual({
domain: '0.1',
domainWithoutSuffix: '0',
hostname: '192.168.0.1',
isIcann: false,
isIp: null,
Expand Down
44 changes: 35 additions & 9 deletions packages/tldts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Using the command-line interface:
$ npx tldts 'http://www.writethedocs.org/conf/eu/2017/'
{
"domain": "writethedocs.org",
"domainWithoutSuffix": "writethedocs",
"hostname": "www.writethedocs.org",
"isIcann": true,
"isIp": false,
Expand All @@ -42,6 +43,7 @@ const { parse } = require('tldts');
// Retrieving hostname related informations of a given URL
parse('http://www.writethedocs.org/conf/eu/2017/');
// { domain: 'writethedocs.org',
// domainWithoutSuffix: 'writethedocs',
// hostname: 'www.writethedocs.org',
// isIcann: true,
// isIp: false,
Expand All @@ -65,6 +67,7 @@ Alternatively, you can try it *directly in your browser* here: https://npm.runki
* `tldts.getDomain(url | hostname, options)`
* `tldts.getPublicSuffix(url | hostname, options)`
* `tldts.getSubdomain(url, | hostname, options)`
* `tldts.getDomainWithoutSuffix(url | hostname, options)`

The behavior of `tldts` can be customized using an `options` argument for all
the functions exposed as part of the public API. This is useful to both change
Expand Down Expand Up @@ -103,6 +106,7 @@ const tldts = require('tldts');

tldts.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv');
// { domain: 'amazonaws.com',
// domainWithoutSuffix: 'amazonaws',
// hostname: 'spark-public.s3.amazonaws.com',
// isIcann: true,
// isIp: false,
Expand All @@ -112,6 +116,7 @@ tldts.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv');

tldts.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv', { allowPrivateDomains: true })
// { domain: 'spark-public.s3.amazonaws.com',
// domainWithoutSuffix: 'spark-public',
// hostname: 'spark-public.s3.amazonaws.com',
// isIcann: false,
// isIp: false,
Expand All @@ -121,6 +126,7 @@ tldts.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv',

tldts.parse('gopher://domain.unknown/');
// { domain: 'domain.unknown',
// domainWithoutSuffix: 'domain',
// hostname: 'domain.unknown',
// isIcann: false,
// isIp: false,
Expand All @@ -130,6 +136,7 @@ tldts.parse('gopher://domain.unknown/');

tldts.parse('https://192.168.0.0') // IPv4
// { domain: null,
// domainWithoutSuffix: null,
// hostname: '192.168.0.0',
// isIcann: null,
// isIp: true,
Expand All @@ -139,6 +146,7 @@ tldts.parse('https://192.168.0.0') // IPv4

tldts.parse('https://[::1]') // IPv6
// { domain: null,
// domainWithoutSuffix: null,
// hostname: '::1',
// isIcann: null,
// isIp: true,
Expand All @@ -148,6 +156,7 @@ tldts.parse('https://[::1]') // IPv6

tldts.parse('[email protected]') // email
// { domain: 'emailprovider.co.uk',
// domainWithoutSuffix: 'emailprovider',
// hostname: 'emailprovider.co.uk',
// isIcann: true,
// isIp: false,
Expand All @@ -156,15 +165,16 @@ tldts.parse('[email protected]') // email
// subdomain: '' }
```

| Property Name | Type | Description |
|:-------------- |:------ |:------------------------------------------- |
| `hostname` | `str` | `hostname` of the input extracted automatically |
| `domain` | `str` | Domain (tld + sld) |
| `subdomain` | `str` | Sub domain (what comes after `domain`) |
| `publicSuffix` | `str` | Public Suffix (tld) of `hostname` |
| `isIcann` | `bool` | Does TLD come from ICANN part of the list |
| `isPrivate` | `bool` | Does TLD come from Private part of the list |
| `isIP` | `bool` | Is `hostname` an IP address? |
| Property Name | Type | Description |
|:--------------------- |:------ |:----------------------------------------------- |
| `hostname` | `str` | `hostname` of the input extracted automatically |
| `domain` | `str` | Domain (tld + sld) |
| `domainWithoutSuffix` | `str` | Domain without public suffix |
| `subdomain` | `str` | Sub domain (what comes after `domain`) |
| `publicSuffix` | `str` | Public Suffix (tld) of `hostname` |
| `isIcann` | `bool` | Does TLD come from ICANN part of the list |
| `isPrivate` | `bool` | Does TLD come from Private part of the list |
| `isIP` | `bool` | Is `hostname` an IP address? |


## Single purpose methods
Expand Down Expand Up @@ -204,6 +214,22 @@ getDomain('fr.t.co'); // returns `t.co`
getDomain('https://user:[email protected]:8080/some/path?and&query#hash'); // returns `example.co.uk`
```

### getDomainWithoutSuffix(url | hostname, options?)

Returns the domain (as returned by `getDomain(...)`) without the public suffix part.

```javascript
const { getDomainWithoutSuffix } = require('tldts');

getDomainWithoutSuffix('google.com'); // returns `google`
getDomainWithoutSuffix('fr.google.com'); // returns `google`
getDomainWithoutSuffix('fr.google.google'); // returns `google`
getDomainWithoutSuffix('foo.google.co.uk'); // returns `google`
getDomainWithoutSuffix('t.co'); // returns `t`
getDomainWithoutSuffix('fr.t.co'); // returns `t`
getDomainWithoutSuffix('https://user:[email protected]:8080/some/path?and&query#hash'); // returns `example`
```

### getSubdomain(url | hostname, options?)

Returns the complete subdomain for a given string.
Expand Down
7 changes: 7 additions & 0 deletions packages/tldts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ export function getHostname(
): string | null {
return parseImpl(url, FLAG.HOSTNAME, suffixLookup, options).hostname;
}

export function getDomainWithoutSuffix(
url: string,
options?: Partial<IOptions>,
): string | null {
return parseImpl(url, FLAG.ALL, suffixLookup, options).domainWithoutSuffix;
}
2 changes: 1 addition & 1 deletion packages/tldts/src/data/trie.ts

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion publicsuffix

0 comments on commit 4e1d859

Please sign in to comment.