Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OSSFuzz Initial Integration #1198

Merged
merged 2 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions fuzzing/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cd "$SRC"/dateparser
pip3 install .

# Build fuzzers in $OUT
for fuzzer in $(find fuzzing -name '*_fuzzer.py');do
compile_python_fuzzer "$fuzzer"
done
zip -q $OUT/dateparser_fuzzer_seed_corpus.zip $SRC/corpus/*
1 change: 1 addition & 0 deletions fuzzing/corpus/current
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
now EST
1 change: 1 addition & 0 deletions fuzzing/corpus/date_time
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
January 12, 2012 10:00 PM
1 change: 1 addition & 0 deletions fuzzing/corpus/french
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Le 11 Décembre 2014 à 09:00
1 change: 1 addition & 0 deletions fuzzing/corpus/minutes_offset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 minutes ago
1 change: 1 addition & 0 deletions fuzzing/corpus/russian
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
13 января 2015 г. в 13:34
1 change: 1 addition & 0 deletions fuzzing/corpus/thai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 เดือนตุลาคม 2005, 1:00 AM
1 change: 1 addition & 0 deletions fuzzing/corpus/time
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
10:00 am
1 change: 1 addition & 0 deletions fuzzing/corpus/time_offset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 hours ago -0500
1 change: 1 addition & 0 deletions fuzzing/corpus/turkish
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
yaklaşık 23 saat önce
114 changes: 114 additions & 0 deletions fuzzing/dateparser_fuzzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import sys
from typing import List

import atheris
from fuzz_helpers import EnhancedFuzzedDataProvider

with atheris.instrument_imports():
import dateparser

import re

import pytz

import dateparser.data
import dateparser.parser

language_codes = dateparser.data.languages_info.language_order
directives = [
"%a",
"%A",
"%w",
"%d",
"%b",
"%B",
"%m",
"%y",
"%Y",
"%H",
"%I",
"%p",
"%M",
"%S",
"%f",
"%z",
"%Z",
"%j",
"%U",
"%W",
"%c",
"%x",
"%X",
"%%",
"%G",
"%u",
"%V",
"%:Z",
]
locale_codes = ["fr-PF", "qu-EC", "af-NA"]
date_order = list(dateparser.parser.date_order_chart.keys())
timezone = list(pytz.all_timezones)
preferred_date = ["last", "first", "current"]
preferred_dates_from = ["past", "future", "current_period"]
parsers = [
"timestamp",
"negative-timestamp",
"relative-time",
"custom-formats",
"absolute-time",
"no-spaces-time",
]


def _get_format_strings(fdp: EnhancedFuzzedDataProvider) -> List[str]:
format_strings = []
for _ in range(fdp.ConsumeIntInRange(0, 5)):
format_strings.append(fdp.ConsumeString(1).join(fdp.ConsumeSublist(directives)))
return format_strings


def TestOneInput(data):
fdp = EnhancedFuzzedDataProvider(data)

settings = {
"DATE_ORDER": fdp.PickValueInList(date_order),
"PREFER_LOCALE_DATE_ORDER": fdp.ConsumeBool(),
"TIMEZONE": fdp.PickValueInList(timezone),
"TO_TIMEZONE": fdp.PickValueInList(timezone),
"RETURN_AS_TIMEZONE_AWARE": fdp.ConsumeBool(),
"PREFER_MONTH_OF_YEAR": fdp.PickValueInList(preferred_date),
"PREFER_DAY_OF_MONTH": fdp.PickValueInList(preferred_date),
"PREFER_DATES_FROM": fdp.PickValueInList(preferred_dates_from),
"RELATIVE_BASE": fdp.ConsumeDate(),
"STRICT_PARSING": fdp.ConsumeBool(),
"REQUIRE_PARTS": [],
"SKIP_TOKENS": [
fdp.ConsumeRandomString() for _ in range(fdp.ConsumeIntInRange(0, 3))
],
"NORMALIZE": fdp.ConsumeBool(),
"RETURN_TIME_AS_PERIOD": fdp.ConsumeBool(),
"PARSERS": fdp.ConsumeSublist(parsers),
"DEFAULT_LANGUAGES": fdp.ConsumeSublist(language_codes),
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": fdp.ConsumeProbability(),
}

try:
dateparser.parse(
fdp.ConsumeRandomString(),
date_formats=_get_format_strings(fdp),
languages=fdp.ConsumeSublist(language_codes),
locales=fdp.ConsumeSublist(locale_codes),
region=fdp.ConsumeString(2),
settings=settings,
)
except re.error:
return -1


def main():
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()


if __name__ == "__main__":
main()
86 changes: 86 additions & 0 deletions fuzzing/fuzz_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import contextlib
import datetime
import io
import tempfile
from typing import List, TypeVar

import atheris

T = TypeVar("T")


class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider):
def ConsumeRandomBytes(self) -> bytes:
return self.ConsumeBytes(self.ConsumeIntInRange(0, self.remaining_bytes()))

def ConsumeRandomString(self) -> str:
return self.ConsumeUnicodeNoSurrogates(
self.ConsumeIntInRange(0, self.remaining_bytes())
)

def ConsumeRemainingString(self) -> str:
return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes())

def ConsumeRemainingBytes(self) -> bytes:
return self.ConsumeBytes(self.remaining_bytes())

def ConsumeSublist(self, source: List[T]) -> List[T]:
"""
Returns a shuffled sub-list of the given list of len [1, len(source)]
"""
chosen = [elem for elem in source if self.ConsumeBool()]

# Shuffle
for i in range(len(chosen) - 1, 1, -1):
j = self.ConsumeIntInRange(0, i)
chosen[i], chosen[j] = chosen[j], chosen[i]

return chosen or [self.PickValueInList(source)]

def ConsumeDate(self) -> datetime.datetime:
try:
return datetime.datetime.fromtimestamp(self.ConsumeFloat())
except (OverflowError, OSError, ValueError):
return datetime.datetime(year=1970, month=1, day=1)

@contextlib.contextmanager
def ConsumeMemoryFile(
self, all_data: bool = False, as_bytes: bool = True
) -> io.BytesIO:
if all_data:
file_data = (
self.ConsumeRemainingBytes()
if as_bytes
else self.ConsumeRemainingString()
)
else:
file_data = (
self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString()
)

file = io.BytesIO(file_data) if as_bytes else io.StringIO(file_data)
yield file
file.close()

@contextlib.contextmanager
def ConsumeTemporaryFile(
self, suffix: str, all_data: bool = False, as_bytes: bool = True
) -> str:
if all_data:
file_data = (
self.ConsumeRemainingBytes()
if as_bytes
else self.ConsumeRemainingString()
)
else:
file_data = (
self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString()
)

mode = "w+b" if as_bytes else "w+"
tfile = tempfile.NamedTemporaryFile(mode=mode, suffix=suffix)
tfile.write(file_data)
tfile.seek(0)
tfile.flush()
yield tfile.name
tfile.close()
1 change: 1 addition & 0 deletions fuzzing/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
atheris
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ envlist = flake8, py3
deps =
-rdateparser_scripts/requirements.txt
-rtests/requirements.txt
-rfuzzing/requirements.txt

[testenv]
deps =
Expand Down