Skip to content

Commit

Permalink
feat: parse episode
Browse files Browse the repository at this point in the history
  • Loading branch information
yjl9903 committed Apr 12, 2023
1 parent 3350c23 commit 5ec08b7
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 53 deletions.
4 changes: 4 additions & 0 deletions packages/anitomy/src/parser/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ export function hasResult(context: ParserContext, category: ElementCategory) {
const value = context.result[category];
return value !== undefined && value !== null && value !== '';
}

export function getResult(context: ParserContext, category: ElementCategory) {
return context.result[category];
}
179 changes: 179 additions & 0 deletions packages/anitomy/src/parser/episode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import { ElementCategory } from '../element';
import { Token, TokenCategory } from '../token';
import { isNumericString, trim } from '../utils';

import { ParserContext, getResult, hasResult, setResult } from './context';
import { isDigit, isValidEpisodeNumber } from './number';

export function matchEpisodePatterns(context: ParserContext, word: string, token: Token) {
if (isNumericString(word)) return false;

word = trim(word, [' ', '-']);

const numericFront = isDigit(word[0]);
const numericBack = isDigit(word[word.length - 1]);

if (numericFront && numericBack) {
// e.g. "01v2"
if (matchSingleEpisodePattern(context, word, token)) {
return true;
}

// e.g. "01-02", "03-05v2"
if (matchMultiEpisodePattern(context, word, token)) {
return true;
}

// e.g. "07.5"
if (matchFractionalEpisodePattern(context, word, token)) {
return true;
}
}

if (numericBack) {
// e.g. "2x01", "S01E03", "S01-02xE001-150"
if (matchSeasonAndEpisodePattern(context, word, token)) {
return true;
}

// e.g. "#01", "#02-03v2"
if (matchNumberSignPattern(context, word, token)) {
return true;
}
}

if (!numericFront) {
// TODO
}

if (numericFront) {
// TODO
}

return true;
}

/**
* Match a single episode pattern. e.g. "01v2".
*/
function matchSingleEpisodePattern(context: ParserContext, word: string, token: Token) {
const RE = /^(\d{1,3})[vV](\d)$/;
const match = RE.exec(word);

if (match) {
setEpisodeNumber(context, match[1], token, false);
setResult(context, ElementCategory.ReleaseVersion, match[2]);
return true;
} else {
return false;
}
}

/**
* Match a multi episode pattern. e.g. "01-02", "03-05v2".
*/
function matchMultiEpisodePattern(context: ParserContext, word: string, token: Token) {
const RE = /^(\d{1,3})(?:[vV](\d))?[-~&+](\d{1,3})(?:[vV](\d))?$/;
const match = RE.exec(word);

if (!match) return false;
const lowerBound = match[1];
const upperBound = match[3];

// Avoid matching expressions such as "009-1" or "5-2"
if (+lowerBound >= +upperBound) return false;
if (!setEpisodeNumber(context, lowerBound, token, true)) {
return false;
}
setEpisodeNumber(context, upperBound, token, true);

if (match[2]) {
setResult(context, ElementCategory.ReleaseVersion, match[2]);
}
if (match[4]) {
setResult(context, ElementCategory.ReleaseVersion, match[4]);
}

return true;
}

/**
* Match fractional episodes. e.g. "07.5"
*/
function matchFractionalEpisodePattern(context: ParserContext, word: string, token: Token) {
const RE = /^\d+\.5$/;
const match = RE.exec(word);
return match && setEpisodeNumber(context, word, token, true);
}

/**
* Match season and episode patterns. e.g. "2x01", "S01E03", "S01-02xE001-150".
*/
function matchSeasonAndEpisodePattern(context: ParserContext, word: string, token: Token) {
const RE = /^S?(\d{1,2})(?:-S?(\d{1,2}))?(?:x|[ ._-x]?E)(\d{1,3})(?:-E?(\d{1,3}))?$/;
const match = RE.exec(word);
if (!match) return false;

setResult(context, ElementCategory.AnimeSeason, match[1]);
if (match[2]) {
setResult(context, ElementCategory.AnimeSeason, match[2]);
}
setEpisodeNumber(context, match[3], token, false);
if (match[4]) {
setEpisodeNumber(context, match[4], token, false);
}

return true;
}

/**
* Match episodes with number signs. e.g. "#01", "#02-03v2"
*/
function matchNumberSignPattern(context: ParserContext, word: string, token: Token) {
if (word[0] !== '#') word = '';
const RE = /^#(\d{1,3})(?:[-~&+](\d{1,3}))?(?:[vV](\d))?$/;
const match = RE.exec(word);
if (!match) return false;

if (!setEpisodeNumber(context, match[1], token, true)) return false;
if (match[2]) {
setEpisodeNumber(context, match[2], token, false);
}
if (match[3]) {
setResult(context, ElementCategory.ReleaseVersion, match[3]);
}

return true;
}

export function setEpisodeNumber(
context: ParserContext,
num: string,
token: Token,
validate: boolean
) {
if (validate && !isValidEpisodeNumber(num)) return false;

token.category = TokenCategory.Identifier;

if (context.isEpisodeKeywordsFound && hasResult(context, ElementCategory.EpisodeNumber)) {
const oldEp = getResult(context, ElementCategory.EpisodeNumber)!;
const diff = +num - +oldEp;
if (diff > 0) {
setResult(context, ElementCategory.EpisodeNumberAlt, num);
return true;
} else if (diff < 0) {
// Move old episode number to alt number
// Then, reset the episode number
setResult(context, ElementCategory.EpisodeNumber, num);
setResult(context, ElementCategory.EpisodeNumberAlt, oldEp);
return true;
} else {
// No need to add the same number twice
return false;
}
} else {
setResult(context, ElementCategory.EpisodeNumber, num);
return true;
}
}
21 changes: 14 additions & 7 deletions packages/anitomy/src/parser/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ import {
isElementCategorySingular,
isResolution
} from './utils';
import { indexOfDigit } from './number';
import { ParserContext, hasResult, setResult } from './context';
import { checkAnimeSeasonKeyword, checkExtentKeyword, indexOfDigit } from './parser';
import { checkAnimeSeasonKeyword, checkExtentKeyword, isTokenIsolated } from './parser';

export function parse(result: ParsedResult, tokens: Token[], options: AnitomyOptions) {
const context: ParserContext = {
Expand Down Expand Up @@ -69,7 +70,7 @@ function searchForKeywords(context: ParserContext) {
if (!isElementCategorySearchable(category) || !found.searchable) {
continue;
}
if (isElementCategorySingular(category)) {
if (isElementCategorySingular(category) && !hasResult(context, category)) {
continue;
}

Expand Down Expand Up @@ -108,12 +109,18 @@ function searchForKeywords(context: ParserContext) {
}

function searchForIsolatedNumbers(context: ParserContext) {
const tokens = context.tokens.filter(
(t) => t.category === TokenCategory.Unknown && indexOfDigit(t.content) !== -1
);
if (tokens.length === 0) return;
for (let i = 0; i < context.tokens.length; i++) {
const token = context.tokens[i];
if (
token.category !== TokenCategory.Unknown ||
!isNumericString(token.content) ||
!isTokenIsolated(context, i)
) {
continue;
}

context.isEpisodeKeywordsFound = hasResult(context, ElementCategory.EpisodeNumber);
const num = +token.content;
}
}

function searchForEpisodeNumber(context: ParserContext) {}
Expand Down
75 changes: 40 additions & 35 deletions packages/anitomy/src/parser/number.ts
Original file line number Diff line number Diff line change
@@ -1,56 +1,61 @@
import { KeywordManager } from '../keyword';
import { isNumericString } from '../utils';
import { ElementCategory } from '../element';
import { Token, TokenCategory } from '../token';

import { indexOfDigit } from './parser';

const AnimeYearMin = 1900;
const AnimeYearMax = 2100;
const EpisodeNumberMax = AnimeYearMax - 1;
const VolumeNumberMax = 50;

export function searchForEpisodePatterns(tokens: Token[]) {
for (const token of tokens) {
const numericFront = token.content.length > 0 && /0-9/.test(token.content[0]);

if (!numericFront) {
} else {
export function indexOfDigit(str: string) {
for (let i = 0; i < str.length; i++) {
if (isDigit(str[i])) {
return i;
}
}
return -1;
}

function numberComesAfterPrefix(category: ElementCategory, token: Token) {
const numberBegin = indexOfDigit(token.content);
const prefix = KeywordManager.normalize(token.content.slice(0, numberBegin));
if (KeywordManager.contains(category, prefix)) return undefined;

const number = token.content.slice(numberBegin);
switch (category) {
case ElementCategory.EpisodePrefix:
if (!matchEpisodePatterns(number, token)) {
}
return true;
case ElementCategory.VolumePrefix:
return true;
default:
return undefined;
}
export function isDigit(str: string) {
return /^[0-9]$/.test(str);
}

function matchEpisodePatterns(word: string, token: Token) {
if (isNumericString(word)) return false;
return true;
}
// export function searchForEpisodePatterns(tokens: Token[]) {
// for (const token of tokens) {
// const numericFront = token.content.length > 0 && /0-9/.test(token.content[0]);

function getEpisodeNumber(num: string, token: Token, validate: boolean) {
if (validate && !isValidEpisodeNumber(num)) return false;
// if (!numericFront) {
// } else {
// }
// }
// }

token.category = TokenCategory.Identifier;
const category = ElementCategory.EpisodeNumber;
}
// function numberComesAfterPrefix(category: ElementCategory, token: Token) {
// const numberBegin = indexOfDigit(token.content);
// const prefix = KeywordManager.normalize(token.content.slice(0, numberBegin));
// if (KeywordManager.contains(category, prefix)) return undefined;

// const number = token.content.slice(numberBegin);
// switch (category) {
// case ElementCategory.EpisodePrefix:
// if (!matchEpisodePatterns(number, token)) {
// }
// return true;
// case ElementCategory.VolumePrefix:
// return true;
// default:
// return undefined;
// }
// }

// function getEpisodeNumber(num: string, token: Token, validate: boolean) {
// if (validate && !isValidEpisodeNumber(num)) return false;

// token.category = TokenCategory.Identifier;
// const category = ElementCategory.EpisodeNumber;
// }

function isValidEpisodeNumber(num: string) {
export function isValidEpisodeNumber(num: string) {
const temp = [];
for (let i = 0; i < num.length && /0-9/.test(num[i]); i++) {
temp.push(num[i]);
Expand Down
25 changes: 14 additions & 11 deletions packages/anitomy/src/parser/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ import { ElementCategory } from '../element';
import { inRange, isNumericString } from '../utils';
import { Token, TokenCategory, TokenFlag, findNextToken, findPrevToken } from '../token';

import { indexOfDigit } from './number';
import { ParserContext, setResult } from './context';
import { matchVolumePatterns, setVolumeNumber } from './volume';
import { matchEpisodePatterns, setEpisodeNumber } from './episode';
import { getNumberFromOrdinal, isMatchTokenCategory } from './utils';

export function checkAnimeSeasonKeyword(context: ParserContext, position: number) {
Expand Down Expand Up @@ -44,29 +47,29 @@ export function checkExtentKeyword(
const token = tokens[position];
const nextToken = findNextToken(tokens, position, TokenFlag.NotDelimiter);
if (!isMatchTokenCategory(TokenCategory.Unknown, tokens[nextToken])) {
return undefined;
return false;
}
if (indexOfDigit(tokens[nextToken].content) !== 0) {
return undefined;
return false;
}

switch (category) {
case ElementCategory.EpisodeNumber:
if (!matchEpisodePatterns(context, tokens[nextToken].content, tokens[nextToken])) {
setEpisodeNumber(context, tokens[nextToken].content, tokens[nextToken], false);
}
break;
case ElementCategory.VolumeNumber:
if (!matchVolumePatterns(context, tokens[nextToken].content, tokens[nextToken])) {
setVolumeNumber(context, tokens[nextToken].content, tokens[nextToken], false);
}
break;
}

token.category = TokenCategory.Identifier;
return true;
}

export function indexOfDigit(str: string) {
for (let i = 0; i < str.length; i++) {
if (/[0-9]/.test(str[i])) {
return i;
}
}
return -1;
export function isTokenIsolated(context: ParserContext, position: number) {
return false;
}

function matchEpisodePatterns(word: string, token: Token) {}
16 changes: 16 additions & 0 deletions packages/anitomy/src/parser/volume.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { Token } from '../token';

import { ParserContext } from './context';

export function matchVolumePatterns(context: ParserContext, word: string, token: Token) {
return true;
}

export function setVolumeNumber(
context: ParserContext,
word: string,
token: Token,
validate: boolean
) {
return true;
}

0 comments on commit 5ec08b7

Please sign in to comment.