diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
new file mode 100644
index 00000000..24e9459b
--- /dev/null
+++ b/.github/workflows/tests.yaml
@@ -0,0 +1,32 @@
+name: Tests
+
+on:
+  - push
+  - pull_request
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+
+    steps:
+    - name: Set git to use LF
+      run: |
+        git config --global core.autocrlf false
+        git config --global core.eol lf
+
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run:
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox
diff --git a/MANIFEST.in b/MANIFEST.in
index da03ba19..12075063 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,7 @@
 include LICENSE
 include README.md
 include requirements.txt
+include requirements-dev.txt
 
 include scienceworld/scienceworld-*.jar
 include scienceworld/object_type_ids.tsv
diff --git a/examples/human.py b/examples/human.py
index f2460dee..067a7ef5 100644
--- a/examples/human.py
+++ b/examples/human.py
@@ -12,7 +12,7 @@ def userConsole(args):
     simplificationStr = args['simplification_str']
 
     # Initialize environment
-    env = ScienceWorldEnv("", args['jar_path'], envStepLimit = args['env_step_limit'])
+    env = ScienceWorldEnv("", args['jar_path'], envStepLimit=args['env_step_limit'])
     taskNames = env.getTaskNames()
     print("Task Names: " + str(taskNames))
 
@@ -22,50 +22,49 @@ def userConsole(args):
     print("Starting Task " + str(taskIdx) + ": " + taskName)
     time.sleep(2)
 
-
     # Reset the environment
     initialObs, initialDict = env.reset()
 
-
-
     #
     #   Examples of how to access much of the environment information that the API exposes.
     #   (Many of these are similar to the Jericho API)
     #
     print("Task Names: " + str(taskNames))
-    print("Possible actions: " + str(env.getPossibleActions()) )
-    print("Possible objects: " + str(env.getPossibleObjects()) )
+    print("Possible actions: " + str(env.getPossibleActions()))
+    print("Possible objects: " + str(env.getPossibleObjects()))
     templates, lut = env.getPossibleActionObjectCombinations()
     print("Possible action/object combinations: " + str(templates))
-    #print("Object IDX to Object Referent LUT: " + str(lut))
-    print("Vocabulary: " + str(env.getVocabulary()) )
+    # print("Object IDX to Object Referent LUT: " + str(lut))
+    print("Vocabulary: " + str(env.getVocabulary()))
     print("Possible actions (with IDs): " + str(env.getPossibleActionsWithIDs()))
     print("Possible object types: " + str(env.getObjectTypes()))
     print("Object IDX to Object Referent LUT: " + str(lut))
     print("\n")
     print("Possible object referents LUT: " + str(env.getPossibleObjectReferentLUT()))
     print("\n")
-    print("Valid action-object combinations: " + str(env.getValidActionObjectCombinations()))
+    print("Valid action-object combinations: " +
+          str(env.getValidActionObjectCombinations()))
     print("\n")
     print("Object_ids to type_ids: " + str(env.getAllObjectTypesLUTJSON()))
     print("\n")
-    print("All objects, their ids, types, and referents: " + str(env.getAllObjectIdsTypesReferentsLUTJSON() ))
+    print("All objects, their ids, types, and referents: " +
+          str(env.getAllObjectIdsTypesReferentsLUTJSON()))
     print("\n")
-    print("Valid action-object combinations (with templates): " + str(env.getValidActionObjectCombinationsWithTemplates() ))
+    print("Valid action-object combinations (with templates): " +
+          str(env.getValidActionObjectCombinationsWithTemplates()))
     print("\n")
     print("Object Type LUT: " + str(env.getPossibleObjectReferentTypesLUT()))
-    print("Variations (train): " + str(env.getVariationsTrain() ))
+    print("Variations (train): " + str(env.getVariationsTrain()))
 
     print("")
     print("----------------------------------------------------------------------------------")
     print("")
 
-
     print("Gold Path:" + str(env.getGoldActionSequence()))
 
     print("Task Name: " + taskName)
     print("Variation: " + str(args['var_num']) + " / " + str(env.getMaxVariations(taskName)))
-    print("Task Description: " + str(env.getTaskDescription()) )
+    print("Task Description: " + str(env.getTaskDescription()))
 
     #
     #   Main user input loop
@@ -97,9 +96,10 @@ def userConsole(args):
             print("Reward: " + str(reward))
             print("Score: " + str(score))
             print("isCompleted: " + str(isCompleted))
-            #print("info: " + str(info))
+            # print("info: " + str(info))
 
-        print("'help' lists valid action templates, 'objects' lists valid objects, 'valid' lists valid action-object combinations (long!). ")
+        print("'help' lists valid action templates, 'objects' lists valid" +
+              " objects, 'valid' lists valid action-object combinations (long!). ")
         print("'goals' lists progress on subgoals.")
         print("type 'exit' to quit.")
 
@@ -108,7 +108,6 @@ def userConsole(args):
         # Sanitize input
         userInputStr = userInputStr.lower().strip()
 
-
     # Display run history
     runHistory = env.getRunHistory()
     print("Run History:")
@@ -143,9 +142,9 @@ def build_simplification_str(args):
 
     return args["simplifications_preset"] or ",".join(simplifications)
 
-#
 #   Parse command line arguments
-#
+
+
 def parse_args():
     desc = "Play through a game using the console."
     parser = argparse.ArgumentParser(desc)
diff --git a/examples/random_agent.py b/examples/random_agent.py
index 7f175189..1bbf38f5 100644
--- a/examples/random_agent.py
+++ b/examples/random_agent.py
@@ -9,7 +9,6 @@ def randomModel(args):
     """ Example random agent -- randomly picks an action at each step. """
     exitCommands = ["quit", "exit"]
 
-
     taskIdx = args['task_num']
     simplificationStr = args['simplification_str']
     numEpisodes = args['num_episodes']
@@ -18,14 +17,15 @@ def randomModel(args):
     finalScores = []
 
     # Initialize environment
-    env = ScienceWorldEnv("", args['jar_path'], envStepLimit = args['env_step_limit'])
+    env = ScienceWorldEnv("", args['jar_path'], envStepLimit=args['env_step_limit'])
 
     taskNames = env.getTaskNames()
     print("Task Names: " + str(taskNames))
 
     # Choose task
     taskName = taskNames[taskIdx]        # Just get first task
-    env.load(taskName, 0, "")            # Load the task, so we have access to some extra accessors e.g. getRandomVariationTrain() )
+    # Load the task, we we have access to some extra accessors e.g. get_random_variation_train()
+    env.load(taskName, 0, "")
     maxVariations = env.getMaxVariations(taskName)
     print("Starting Task " + str(taskIdx) + ": " + taskName)
     time.sleep(2)
@@ -40,18 +40,17 @@ def randomModel(args):
         initialObs, initialDict = env.reset()
 
         # Example accessors
-        print("Possible actions: " + str(env.getPossibleActions()) )
-        print("Possible objects: " + str(env.getPossibleObjects()) )
+        print("Possible actions: " + str(env.getPossibleActions()))
+        print("Possible objects: " + str(env.getPossibleObjects()))
         templates, lut = env.getPossibleActionObjectCombinations()
         print("Possible action/object combinations: " + str(templates))
         print("Object IDX to Object Referent LUT: " + str(lut))
         print("Task Name: " + taskName)
         print("Task Variation: " + str(randVariationIdx) + " / " + str(maxVariations))
-        print("Task Description: " + str(env.getTaskDescription()) )
-        print("look: " + str(env.look()) )
-        print("inventory: " + str(env.inventory()) )
-        print("taskdescription: " + str(env.taskdescription()) )
-
+        print("Task Description: " + str(env.getTaskDescription()))
+        print("look: " + str(env.look()))
+        print("inventory: " + str(env.inventory()))
+        print("taskdescription: " + str(env.taskdescription()))
 
         score = 0.0
         isCompleted = False
@@ -59,9 +58,9 @@ def randomModel(args):
 
         # Run one episode until we reach a stopping condition (including exceeding the maximum steps)
         userInputStr = "look around"        # First action
-        while (userInputStr not in exitCommands) and (isCompleted == False):
+        while (userInputStr not in exitCommands) and (isCompleted is False):
             print("----------------------------------------------------------------")
-            print ("Step: " + str(curIter))
+            print("Step: " + str(curIter))
 
             # Send user input, get response
             observation, reward, isCompleted, info = env.step(userInputStr)
@@ -72,27 +71,27 @@ def randomModel(args):
             print("Score: " + str(score))
             print("isCompleted: " + str(isCompleted))
 
-            # The environment will make isCompleted `True` when a stop condition has happened, or the maximum number of steps is reached.
+            # The environment will make isCompleted `True` when a stop condition
+            # has happened, or the maximum number of steps is reached.
             if (isCompleted):
                 break
 
             # Randomly select action
 
-            ## Any action (valid or not)
-            #templates, lut = env.getPossibleActionObjectCombinations()
-            #print("Possible action/object combinations: " + str(templates))
-            #print("Object IDX to Object Referent LUT: " + str(lut))
-            #randomTemplate = random.choice( templates )
-            #print("Next random action: " + str(randomTemplate))
-            #userInputStr = randomTemplate["action"]
+            # Any action (valid or not)
+            # templates, lut = env.getPossibleActionObjectCombinations()
+            # print("Possible action/object combinations: " + str(templates))
+            # print("Object IDX to Object Referent LUT: " + str(lut))
+            # randomTemplate = random.choice( templates )
+            # print("Next random action: " + str(randomTemplate))
+            # userInputStr = randomTemplate["action"]
 
-            ## Only valid actions
+            # Only valid actions
             validActions = env.getValidActionObjectCombinationsWithTemplates()
-            randomAction = random.choice( validActions )
+            randomAction = random.choice(validActions)
             print("Next random action: " + str(randomAction))
             userInputStr = randomAction["action"]
 
-
             print(list(lut.keys())[-1])
 
             # Sanitize input
@@ -110,19 +109,20 @@ def randomModel(args):
         finalScores.append(score)
 
         # Report progress of model
-        print ("Final score: " + str(score))
-        print ("isCompleted: " + str(isCompleted))
+        print("Final score: " + str(score))
+        print("isCompleted: " + str(isCompleted))
 
         # Save history -- and when we reach maxPerFile, export them to file
         filenameOutPrefix = args['output_path_prefix'] + str(taskIdx)
-        env.storeRunHistory(episodeIdx, notes = {'text':'my notes here'} )
+        env.storeRunHistory(episodeIdx, notes={'text': 'my notes here'})
         env.saveRunHistoriesBufferIfFull(filenameOutPrefix, maxPerFile=args['max_episode_per_file'])
 
     # Episodes are finished -- manually save any last histories still in the buffer
     env.saveRunHistoriesBufferIfFull(filenameOutPrefix, maxPerFile=args['max_episode_per_file'], forceSave=True)
 
-    # Show final episode scores to user:
-    avg = sum([x for x in finalScores if x >=0]) / len(finalScores)     # Clip negative scores to 0 for average calculation
+    # Show final episode scores to user
+    # Clip negative scores to 0 for average calculation
+    avg = sum([x for x in finalScores if x >= 0]) / len(finalScores)
     print("")
     print("---------------------------------------------------------------------")
     print(" Summary (Random Agent)")
diff --git a/examples/scienceworld-web-server-example.py b/examples/scienceworld-web-server-example.py
index ba39b207..b5258afa 100644
--- a/examples/scienceworld-web-server-example.py
+++ b/examples/scienceworld-web-server-example.py
@@ -27,22 +27,22 @@ def __init__(self):
         self.out = ""
         self.title = ""
 
-    def setTitle(self, titleStr:str):
+    def setTitle(self, titleStr: str):
         self.title = titleStr
 
-    def addHeading(self, strIn:str):
+    def addHeading(self, strIn: str):
         self.out += "<h1>" + strIn + "</h1>\n"
 
-    def addSubheading(self, strIn:str):
+    def addSubheading(self, strIn: str):
         self.out += "<h2>" + strIn + "</h2>\n"
 
     def addHorizontalRule(self):
         self.out += "<hr>\n"
 
-    def addPreformattedText(self, strIn:str):
+    def addPreformattedText(self, strIn: str):
         self.out += "<pre>\n" + strIn + "\n</pre>\n"
 
-    def addStr(self, strIn:str):
+    def addStr(self, strIn: str):
         self.out += strIn + "\n"
 
     def getHTML(self):
@@ -55,10 +55,8 @@ def getHTML(self):
 
         return out
 
-#
-#   Save JSON history
-#
-def saveJSONHistory(history:list):
+
+def saveJSONHistory(history: list):
     pathOut = "recordings/"
     if not os.path.isdir(pathOut):
         os.mkdir(pathOut)
@@ -74,9 +72,10 @@ def saveJSONHistory(history:list):
     dateTimeObj = datetime.now()
     timestampStr = dateTimeObj.strftime("timestamp%Y-%M-%d-%H-%M-%S")
 
-    filenameOut = pathOut + "recording-" + str(taskName) + "-var" + str(varIdx) + "-" + str(result) + "-" + str(timestampStr) + ".json"
+    filenameOut = (pathOut + "recording-" + str(taskName) + "-var" + str(varIdx) +
+                   "-" + str(result) + "-" + str(timestampStr) + ".json")
 
-    print ("Exporting " + filenameOut)
+    print("Exporting " + filenameOut)
 
     with open(filenameOut, "w") as jsonFile:
         json.dump(history, jsonFile, indent=4, sort_keys=True)
@@ -94,10 +93,10 @@ def app():
     pywebio.session.set_env(title='ScienceWorld Demo', auto_scroll_bottom=True)
 
     # Initialize environment
-    env = ScienceWorldEnv("", serverPath=None, envStepLimit = 10_000)
+    env = ScienceWorldEnv("", serverPath=None, envStepLimit=10_000)
 
     pywebio_out.put_markdown('## Science World (Text Simulation)')
-    #put_button("Click here to export transcript", onclick=lambda: , color='success', outline=True)
+    # put_button("Click here to export transcript", onclick=lambda: , color='success', outline=True)
 
     htmlLog.addHeading("Science World (Text Simulation)")
     htmlLog.addHorizontalRule()
@@ -105,18 +104,18 @@ def app():
     taskName = pywebio.input.select("Select a task:", env.getTaskNames())
     maxVariations = env.getMaxVariations(taskName)
 
-    #variationIdx = slider("Task Variation: ", min_value=0, max_value=(maxVariations-1))
+    # variationIdx = slider("Task Variation: ", min_value=0, max_value=(maxVariations-1))
     variationIdx = pywebio.input.input('Enter the task variation (min = 0, max = ' + str(maxVariations) + "):")
     variationIdx = int(variationIdx) if variationIdx.isdigit() else 0
 
     # Load environment
     env.load(taskName, variationIdx, simplificationStr)
     initialObs, initialDict = env.reset()
-    #time.sleep(1)
+    # time.sleep(1)
 
-    #print("Possible actions: " + str(env.getPossibleActions()) )
-    #print("Possible objects: " + str(env.getPossibleObjects()) )
-    #print("Possible action/object combinations: " + str(env.getPossibleActionObjectCombinations()))
+    # print("Possible actions: " + str(env.getPossibleActions()) )
+    # print("Possible objects: " + str(env.getPossibleObjects()) )
+    # print("Possible action/object combinations: " + str(env.getPossibleActionObjectCombinations()))
 
     pywebio_out.put_table([
         ["Task", env.getTaskDescription()],
@@ -132,9 +131,9 @@ def app():
     userInputStr = "look around"        # First action
     consoleMoveCount = 0
     while (userInputStr not in exitCommands):
-        #put_markdown("### Move " + str(env.getNumMoves()) )
-        #htmlLog.addSubheading("Move " + str(env.getNumMoves()))
-        pywebio_out.put_markdown("### Move " + str(consoleMoveCount) )
+        # put_markdown("### Move " + str(env.getNumMoves()) )
+        # htmlLog.addSubheading("Move " + str(env.getNumMoves()))
+        pywebio_out.put_markdown("### Move " + str(consoleMoveCount))
         htmlLog.addSubheading("Move " + str(consoleMoveCount))
 
         # Send user input, get response
@@ -152,16 +151,18 @@ def app():
         # Output (log)
         htmlLog.addPreformattedText(observation)
         if (score >= 1.0):
-            htmlLog.addStr("<font color=green>Task Score: " + str(score) + "  (isCompleted: " + str(isCompleted) + ") </font><br><br>")
+            htmlLog.addStr("<font color=green>Task Score: " + str(score) +
+                           "  (isCompleted: " + str(isCompleted) + ") </font><br><br>")
         else:
-            htmlLog.addStr("<font color=grey>Task Score: " + str(score) + "  (isCompleted: " + str(isCompleted) + ") </font><br><br>")
+            htmlLog.addStr("<font color=grey>Task Score: " + str(score) +
+                           "  (isCompleted: " + str(isCompleted) + ") </font><br><br>")
 
         logFilename = "log-" + taskName + ".html"
         pywebio_out.put_file(logFilename, htmlLog.getHTML().encode(), '(click here to export transcript)')
 
-        #print("\n" + observation)
-        #print("Score: " + str(score))
-        #print("isCompleted: " + str(isCompleted))
+        # print("\n" + observation)
+        # print("Score: " + str(score))
+        # print("isCompleted: " + str(isCompleted))
 
         # Record history
         packed = {
@@ -180,11 +181,12 @@ def app():
         historyRecording.append(packed)
 
         # If this session is completed, save the recording
-        if (isCompleted == True):
+        if isCompleted:
             saveJSONHistory(historyRecording)
 
         # Get user input
-        userInputStr = pywebio.input.input('Enter your action (`help` for list of actions, `objects` for list of object referents) ')
+        userInputStr = pywebio.input.input('Enter your action (`help` for list' +
+                                           'of actions, `objects` for list of object referents) ')
 
         # Sanitize input
         userInputStr = userInputStr.lower().strip()
@@ -197,7 +199,6 @@ def app():
 
         time.sleep(1)
 
-
     print("Completed.")
 
 
diff --git a/goldpaths/exampleLoadGoldPaths.py b/goldpaths/exampleLoadGoldPaths.py
index a5fd4928..055e8dae 100644
--- a/goldpaths/exampleLoadGoldPaths.py
+++ b/goldpaths/exampleLoadGoldPaths.py
@@ -46,7 +46,7 @@
             print(obs)
             print("")
 
-            numMoves +=1
+            numMoves += 1
         numSequences += 1
 
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index acabe71b..b26c3a92 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,7 @@
 build
+flake8
 pre-commit
 py4j
 pytest
+tox
 twine
diff --git a/scienceworld/__init__.py b/scienceworld/__init__.py
index 71a2d30d..f59eb1b0 100644
--- a/scienceworld/__init__.py
+++ b/scienceworld/__init__.py
@@ -1,3 +1,5 @@
 from .version import __version__
 from .scienceworld import ScienceWorldEnv
 from .scienceworld import BufferedHistorySaver
+
+__all__ = ['__version__', 'ScienceWorldEnv', 'BufferedHistorySaver']
diff --git a/scienceworld/constants.py b/scienceworld/constants.py
index bed8da24..5e46657e 100644
--- a/scienceworld/constants.py
+++ b/scienceworld/constants.py
@@ -1,6 +1,5 @@
 import json
 import os
-from collections import OrderedDict
 
 BASEPATH = os.path.dirname(os.path.abspath(__file__))
 JAR_FILE = 'scienceworld.jar'
@@ -8,6 +7,7 @@
 TASK_FILE = 'tasks.json'
 TASK_PATH = os.path.join(BASEPATH, TASK_FILE)
 
+
 def is_in_debug_mode() -> bool:
     """Determine whether debug mode should be enabled.
 
@@ -25,6 +25,7 @@ def is_in_debug_mode() -> bool:
     else:
         raise ValueError(f'{env_value!r} is not a valid value for "SCIENCEWORLD_DEBUG"')
 
+
 DEBUG_MODE = is_in_debug_mode()
 
 with open(TASK_PATH) as file:
diff --git a/scienceworld/scienceworld.py b/scienceworld/scienceworld.py
index fb8cd4d2..24270782 100644
--- a/scienceworld/scienceworld.py
+++ b/scienceworld/scienceworld.py
@@ -1,11 +1,10 @@
-import os
 import json
 import logging
 from collections import OrderedDict
 
 from py4j.java_gateway import JavaGateway, GatewayParameters, launch_gateway, CallbackServerParameters
 
-from scienceworld.constants import BASEPATH, DEBUG_MODE, ID2TASK, JAR_PATH, NAME2ID
+from scienceworld.constants import BASEPATH, DEBUG_MODE, ID2TASK, JAR_PATH
 from scienceworld.utils import infer_task, snake_case_deprecation_warning
 
 logger = logging.getLogger(__name__)
@@ -20,7 +19,8 @@ def __init__(self, taskName=None, serverPath=None, envStepLimit=100):
         # Launch Java side with dynamic port and get back the port on which the
         # server was bound to.
         if DEBUG_MODE:
-            import sys, time
+            import sys
+            import time
             port = launch_gateway(
                 classpath=serverPath, die_on_exit=True, cwd=BASEPATH,
                 javaopts=['-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005,quiet=y'],
@@ -96,7 +96,8 @@ def load(self, taskName, variationIdx=0, simplificationStr="", generateGoldPath=
         self.simplificationStr = simplificationStr
         self.variationIdx = variationIdx
 
-        logger.info(f"Loading: {self.taskName} (variation: {self.variationIdx}) (simplifications: {self.simplificationStr})")
+        logger.info(f"Loading: {self.taskName} (variation: {self.variationIdx})" +
+                    f" (simplifications: {self.simplificationStr})")
         self.server.load(self.taskName, self.variationIdx, self.simplificationStr, generateGoldPath)
 
         # Reset last step score (used to calculate reward from current-previous score)
@@ -125,8 +126,6 @@ def get_simplifications_used(self):
     def get_possible_simplifications(self):
         return self.server.getPossibleSimplifications().split(", ")
 
-
-
     @property
     def tasks(self):
         """ Get the supported tasks in ScienceWorld. """
@@ -218,26 +217,22 @@ def get_vocabulary(self):
 
         # Object vocabulary (keep as compound nouns?)
         vocabObjects = self.get_possible_objects()
-        vocab = vocab.union( set(vocabObjects) )
+        vocab = vocab.union(set(vocabObjects))
 
         return vocab
 
-
     def get_num_moves(self):
         return self.server.getNumMoves()
 
     def get_task_description(self):
         return self.server.getTaskDescription()
 
-    #
     # History
-    #
     def get_run_history(self):
         historyStr = self.server.getRunHistoryJSON()
         jsonOut = json.loads(historyStr)
         return jsonOut
 
-
     # History saving (provides an API to do this, so it's consistent across agents)
     def store_run_history(self, episode_idx_key, notes):
         packed = {
@@ -246,7 +241,7 @@ def store_run_history(self, episode_idx_key, notes):
             'history': self.get_run_history()
         }
 
-        self.runHistories[episodeIdxKey] = packed
+        self.runHistories[episode_idx_key] = packed
 
     def save_run_histories(self, filename_out_prefix):
         # Save history
@@ -274,14 +269,11 @@ def clear_run_histories(self):
 
     # A one-stop function to handle saving.
     def save_run_histories_buffer_if_full(self, filename_out_prefix, max_per_file=1000, force_save=False):
-        if ((self.get_run_history_size() >= max_per_file) or (force_save == True)):
+        if ((self.get_run_history_size() >= max_per_file) or force_save):
             self.save_run_histories(filename_out_prefix)
             self.clear_run_histories()
 
-
-    #
     # Train/development/test sets
-    #
     def get_variations_train(self):
         return list(self.server.getVariationsTrain())
 
@@ -302,14 +294,13 @@ def get_random_variation_test(self):
 
     # Gold action sequence
     def get_gold_action_sequence(self):
-        if (self.goldPathGenerated == True):
+        if (self.goldPathGenerated):
             return list(self.server.getGoldActionSequence())
         else:
             return ["ERROR: Gold path was not generated.  Set `generateGoldPath` flag to true when calling load()."]
 
-
     # Step
-    def step(self, input_str:str):
+    def step(self, input_str: str):
         observation = self.server.step(input_str)
         score = int(round(100 * self.server.getScore()))        # Convert from 0-1 to 0-100
         isCompleted = self.server.getCompleted()
@@ -319,12 +310,12 @@ def step(self, input_str:str):
         reward = score - self.lastStepScore         # Calculate reward (delta score) for this step
         self.lastStepScore = score                  # Store current score for reward calculation on the next step
 
-
         # If the number of moves exceeds the environment step limit, then set isCompleted to be true
         if (numMoves > self.envStepLimit):
             isCompleted = True
 
-        # New: Handle this in the API rather than the agent -- if the score is less than zero, then set the isCompleted flag to true.
+        # New: Handle this in the API rather than the agent
+        # if the score is less than zero, then set the isCompleted flag to true.
         if (score < 0):
             isCompleted = True
 
@@ -344,7 +335,6 @@ def step(self, input_str:str):
 
         return observation, reward, isCompleted, infos
 
-
     # Special actions that are "free" (consume zero time)
     def look(self):
         observation = self.server.freeActionLook()
@@ -363,8 +353,7 @@ def get_goal_progress_str(self):
         goalStr = self.server.getGoalProgressStr()
         return goalStr
 
-
-    ####################### Camel Case Methods ################################
+    # ---------------- Camel Case Methods ---------------------
     # All of the wrapper methods for camel case, to avoid breaking projects.
 
     # Simplifications
@@ -461,7 +450,6 @@ def getVocabulary(self):
 
         return self.get_vocabulary()
 
-
     def getNumMoves(self):
         snake_case_deprecation_warning()
 
@@ -544,8 +532,6 @@ def getGoalProgressStr(self):
         return self.get_goal_progress_str()
 
 
-
-
 class BufferedHistorySaver:
 
     #
@@ -597,6 +583,6 @@ def clearRunHistories(self):
 
     # A one-stop function to handle saving.
     def saveRunHistoriesBufferIfFull(self, maxPerFile=1000, forceSave=False):
-        if ((self.getRunHistorySize() >= maxPerFile) or (forceSave == True)):
+        if ((self.getRunHistorySize() >= maxPerFile) or forceSave):
             self.saveRunHistories()
             self.clearRunHistories()
diff --git a/setup.py b/setup.py
index 2228fdfe..f8af64ad 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
-import os.path, sys
+import os.path
+import sys
 import re
 import zipfile
 
@@ -23,19 +24,20 @@
     f.write(f'__version__ = {VERSION!r}\n')
 
 setup(name='scienceworld',
-    version=VERSION,
-    description='ScienceWorld: An interactive text environment to study AI agents on accomplishing tasks from the standardized elementary science curriculum.',
-    author='Peter Jansen',
-    packages=['scienceworld'],
-    include_package_data=True,
-    package_dir={'scienceworld': 'scienceworld'},
-    package_data={'scienceworld': [JAR_FILE, OBJECTS_LUT_FILE, TASKS_JSON_FILE]},
-    url="https://scienceworld.github.io",
-    long_description=open("README.md").read(),
-    long_description_content_type="text/markdown",
-    python_requires='>=3.7',
-    install_requires=open('requirements.txt').readlines(),
-    extras_require={
-        'webserver': open('requirements.txt').readlines() + ['pywebio'],
-    },
-)
+      version=VERSION,
+      description='ScienceWorld: An interactive text environment to study AI' +
+                  'agents on accomplishing tasks from the standardized elementary science curriculum.',
+      author='Peter Jansen',
+      packages=['scienceworld'],
+      include_package_data=True,
+      package_dir={'scienceworld': 'scienceworld'},
+      package_data={'scienceworld': [JAR_FILE, OBJECTS_LUT_FILE, TASKS_JSON_FILE]},
+      url="https://scienceworld.github.io",
+      long_description=open("README.md").read(),
+      long_description_content_type="text/markdown",
+      python_requires='>=3.7',
+      install_requires=open('requirements.txt').readlines(),
+      extras_require={
+          'webserver': open('requirements.txt').readlines() + ['pywebio'],
+          'testing': open(f'{BASEPATH}/requirements-dev.txt').readlines(),
+      })
diff --git a/tests/test_scienceworld.py b/tests/test_scienceworld.py
index 66628f66..193087de 100644
--- a/tests/test_scienceworld.py
+++ b/tests/test_scienceworld.py
@@ -115,6 +115,7 @@ def test_load():
         assert obs1 == obs2, f"{task_id} is not the same as {task_name}"
         assert info1 == info2, f"{task_id} is not the same as {task_name}"
 
+
 def test_consistent_task_names():
     """Verify that Scala and Python code use the same task names."""
     env = ScienceWorldEnv()
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 00000000..366c565c
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,29 @@
+[tox]
+minversion = 3.8.0
+envlist = py38, py39, py310, py311, py312, flake8, precommit
+
+[gh-actions]
+python =
+    3.8: py38, flake8, precommit
+    3.9: py39, flake8, precommit
+    3.10: py310, flake8, precommit
+    3.11: py311, flake8, precommit
+    3.12: py312, flake8, precommit
+
+[testenv]
+setenv =
+    PYTHONPATH = {toxinidir}
+deps =
+    -r{toxinidir}/requirements-dev.txt
+commands =
+    pytest --basetemp={envtmpdir}
+
+[testenv:flake8]
+basepython = python3.10
+deps = flake8
+commands = flake8 --max-line-length 120
+
+[testenv:precommit]
+basepython = python3.10
+deps = pre-commit
+commands = pre-commit run --all-files