Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new RAI Utils package for common utilities shared across RAI packages #1280

Merged
merged 1 commit into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/CI-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ jobs:
ci-python:
strategy:
matrix:
packageDirectory: ["rai_core_flask", "responsibleai", "erroranalysis"]
packageDirectory:
["rai_core_flask", "responsibleai", "erroranalysis", "raiutils"]
operatingSystem: [ubuntu-latest, macos-latest, windows-latest]
pythonVersion: [3.6, 3.7, 3.8, 3.9]
exclude:
Expand Down
11 changes: 11 additions & 0 deletions raiutils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Responsible AI Utilities for Python

### This package has been tested with Python 3.6, 3.7, 3.8 and 3.9

The Responsible AI Utilities package contains common functions shared across various RAI tools, including fairlearn, interpret-community, responsibleai, raiwidgets and other packages, as well as notebook examples.

Please see the main documentation website:
https://responsibleaitoolbox.ai/

The open source code can be found here:
https://github.com/microsoft/responsible-ai-widgets
9 changes: 9 additions & 0 deletions raiutils/raiutils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

"""Responsible AI Utilities package."""

from .version import name, version

__name__ = name
__version__ = version
7 changes: 7 additions & 0 deletions raiutils/raiutils/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

"""Module for defining common utilities across Responsible AI utilities."""
from .retries import retry_function

__all__ = ['retry_function']
41 changes: 41 additions & 0 deletions raiutils/raiutils/common/retries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import time


def retry_function(function, action_name, err_msg,
max_retries=4, retry_delay=60):
"""Common utility to retry calling a function with exponential backoff.

:param function: The function to call.
:type function: function
:param action_name: The name of the action being performed.
:type action_name: str
:param err_msg: The error message to display if the function fails.
:type err_msg: str
:param max_retries: The maximum number of retries.
:type max_retries: int
:param retry_delay: The delay between retries.
:type retry_delay: int
:return: The result of the function call. May be None if function
does not return a result.
:rtype: object
"""
for i in range(max_retries):
try:
print("{0} attempt {1} of {2}".format(
action_name, i + 1, max_retries))
result = function()
break
except Exception as e:
print("{0} attempt failed with exception:".format(action_name))
print(e)
if i + 1 != max_retries:
print("Will retry after {0} seconds".format(retry_delay))
time.sleep(retry_delay)
retry_delay = retry_delay * 2
else:
raise RuntimeError(err_msg)

return result
7 changes: 7 additions & 0 deletions raiutils/raiutils/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

"""Module for defining common utilities related to datasets."""
from .dataset import fetch_dataset

__all__ = ['fetch_dataset']
47 changes: 47 additions & 0 deletions raiutils/raiutils/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

from raiutils.common.retries import retry_function

try:
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve


class Retriever(object):
"""A class for retrieving files from a URL."""

def __init__(self, url, filename):
"""Initialize the Retriever object.

:param url: The URL to retrieve the file from.
:type url: str
:param filename: The filename to retrieve.
:type filename: str
"""
self._url = url
self._filename = filename

def retrieve(self):
"""Retrieve the file from the URL."""
urlretrieve(self._url, self._filename)


def fetch_dataset(url, filename, max_retries=4, retry_delay=60):
"""Retrieve a dataset from a URL.

:param url: The URL to retrieve the dataset from.
:type url: str
:param filename: The filename to retrieve.
:type filename: str
:param max_retries: The maximum number of retries.
:type max_retries: int
:param retry_delay: The delay between retries.
:type retry_delay: int
"""
retriever = Retriever(url, filename)
action_name = "Download"
err_msg = "Failed to download dataset"
retry_function(retriever.retrieve, action_name, err_msg,
max_retries=max_retries, retry_delay=retry_delay)
8 changes: 8 additions & 0 deletions raiutils/raiutils/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

name = 'raiutils'
_major = '0'
_minor = '0'
_patch = '1'
version = '{}.{}.{}'.format(_major, _minor, _patch)
7 changes: 7 additions & 0 deletions raiutils/raiutils/webservice/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

"""Module for defining common utilities related to webservices."""
from .webservice import post_with_retries

__all__ = ['post_with_retries']
57 changes: 57 additions & 0 deletions raiutils/raiutils/webservice/webservice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import requests

from raiutils.common.retries import retry_function


class Post(object):
"""A class for sending post requests to a webservice."""

def __init__(self, uri, input_data, headers):
"""Initialize the Post object.

:param uri: The URI to send the post request to.
:type uri: str
:param input_data: The data to send in the post request.
:type input_data: dict
:param headers: The optional headers to send with the post request.
:type headers: dict
"""
self._uri = uri
self._input_data = input_data
self._headers = headers

def post(self):
"""Send the post request to the URI.

:return: The response from the post request.
:rtype: requests.Response
"""
return requests.post(self._uri, self._input_data,
headers=self._headers)


def post_with_retries(uri, input_data, headers=None,
max_retries=4, retry_delay=60):
"""Send a post request to a webservice with exponential backoff.

:param uri: The URI to send the post request to.
:type uri: str
:param input_data: The data to send in the post request.
:type input_data: dict
:param headers: The optional headers to send with the post request.
:type headers: dict
:param max_retries: The maximum number of retries.
:type max_retries: int
:param retry_delay: The delay between retries.
:type retry_delay: int
:return: The response from the post request.
:rtype: requests.Response
"""
post = Post(uri, input_data, headers)
action_name = "Post"
err_msg = "Unable to post to web service"
return retry_function(post.post, action_name, err_msg,
max_retries=max_retries, retry_delay=retry_delay)
8 changes: 8 additions & 0 deletions raiutils/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pytest==5.0.1
pytest-cov
pytest-mock==3.1.1
requests==2.25.1

requirements-parser==0.2.0

pandas>=0.25.1
1 change: 1 addition & 0 deletions raiutils/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests==2.25.1
39 changes: 39 additions & 0 deletions raiutils/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import setuptools

with open('raiutils/version.py') as f:
code = compile(f.read(), f.name, 'exec')
exec(code)

# Fetch ReadMe
with open("README.md", "r") as fh:
long_description = fh.read()

# Use requirements.txt to set the install_requires
with open('requirements.txt') as f:
install_requires = [line.strip() for line in f]

setuptools.setup(
name=name, # noqa: F821
version=version, # noqa: F821
author="Roman Lutz, Ilya Matiach, Ke Xu",
author_email="[email protected]",
description="Common basic utilities used across various RAI tools",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/microsoft/responsible-ai-widgets",
packages=setuptools.find_packages(),
python_requires='>=3.6',
install_requires=install_requires,
classifiers=[
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Development Status :: 3 - Alpha"
]
)
30 changes: 30 additions & 0 deletions raiutils/tests/test_fetch_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import zipfile

import pandas as pd
import pytest

from raiutils.dataset import fetch_dataset


class TestFetchDataset(object):

def test_fetch_dataset(self):
outdirname = 'responsibleai.12.28.21'
zipfilename = outdirname + '.zip'
data = 'https://publictestdatasets.blob.core.windows.net/data/'
url = data + zipfilename
fetch_dataset(url, zipfilename)
with zipfile.ZipFile(zipfilename, 'r') as unzip:
unzip.extractall('.')

train_data = pd.read_csv('adult-train.csv')
assert train_data.shape[0] == 32561
assert train_data.shape[1] == 15

def test_fetch_bad_url(self):
url = 'https://publictestdatasets.blob.core.windows.net/data/bad.zip'
with pytest.raises(RuntimeError):
fetch_dataset(url, 'bad_url.zip', max_retries=2, retry_delay=1)
15 changes: 15 additions & 0 deletions raiutils/tests/test_post.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import pytest

from raiutils.webservice import post_with_retries


class TestPost(object):

def test_post_bad_uri(self):
uri = 'https://bad_uri'
input_data = {}
with pytest.raises(RuntimeError):
post_with_retries(uri, input_data, max_retries=2, retry_delay=1)