diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 00000000..24e9459b --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,32 @@ +name: Tests + +on: + - push + - pull_request + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + + steps: + - name: Set git to use LF + run: | + git config --global core.autocrlf false + git config --global core.eol lf + + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: + python -m pip install --upgrade pip + pip install tox tox-gh-actions + - name: Test with tox + run: tox diff --git a/MANIFEST.in b/MANIFEST.in index da03ba19..12075063 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include LICENSE include README.md include requirements.txt +include requirements-dev.txt include scienceworld/scienceworld-*.jar include scienceworld/object_type_ids.tsv diff --git a/examples/human.py b/examples/human.py index f2460dee..067a7ef5 100644 --- a/examples/human.py +++ b/examples/human.py @@ -12,7 +12,7 @@ def userConsole(args): simplificationStr = args['simplification_str'] # Initialize environment - env = ScienceWorldEnv("", args['jar_path'], envStepLimit = args['env_step_limit']) + env = ScienceWorldEnv("", args['jar_path'], envStepLimit=args['env_step_limit']) taskNames = env.getTaskNames() print("Task Names: " + str(taskNames)) @@ -22,50 +22,49 @@ def userConsole(args): print("Starting Task " + str(taskIdx) + ": " + taskName) time.sleep(2) - # Reset the environment initialObs, initialDict = env.reset() - - # # Examples of how to access much of the environment information that the API exposes. # (Many of these are similar to the Jericho API) # print("Task Names: " + str(taskNames)) - print("Possible actions: " + str(env.getPossibleActions()) ) - print("Possible objects: " + str(env.getPossibleObjects()) ) + print("Possible actions: " + str(env.getPossibleActions())) + print("Possible objects: " + str(env.getPossibleObjects())) templates, lut = env.getPossibleActionObjectCombinations() print("Possible action/object combinations: " + str(templates)) - #print("Object IDX to Object Referent LUT: " + str(lut)) - print("Vocabulary: " + str(env.getVocabulary()) ) + # print("Object IDX to Object Referent LUT: " + str(lut)) + print("Vocabulary: " + str(env.getVocabulary())) print("Possible actions (with IDs): " + str(env.getPossibleActionsWithIDs())) print("Possible object types: " + str(env.getObjectTypes())) print("Object IDX to Object Referent LUT: " + str(lut)) print("\n") print("Possible object referents LUT: " + str(env.getPossibleObjectReferentLUT())) print("\n") - print("Valid action-object combinations: " + str(env.getValidActionObjectCombinations())) + print("Valid action-object combinations: " + + str(env.getValidActionObjectCombinations())) print("\n") print("Object_ids to type_ids: " + str(env.getAllObjectTypesLUTJSON())) print("\n") - print("All objects, their ids, types, and referents: " + str(env.getAllObjectIdsTypesReferentsLUTJSON() )) + print("All objects, their ids, types, and referents: " + + str(env.getAllObjectIdsTypesReferentsLUTJSON())) print("\n") - print("Valid action-object combinations (with templates): " + str(env.getValidActionObjectCombinationsWithTemplates() )) + print("Valid action-object combinations (with templates): " + + str(env.getValidActionObjectCombinationsWithTemplates())) print("\n") print("Object Type LUT: " + str(env.getPossibleObjectReferentTypesLUT())) - print("Variations (train): " + str(env.getVariationsTrain() )) + print("Variations (train): " + str(env.getVariationsTrain())) print("") print("----------------------------------------------------------------------------------") print("") - print("Gold Path:" + str(env.getGoldActionSequence())) print("Task Name: " + taskName) print("Variation: " + str(args['var_num']) + " / " + str(env.getMaxVariations(taskName))) - print("Task Description: " + str(env.getTaskDescription()) ) + print("Task Description: " + str(env.getTaskDescription())) # # Main user input loop @@ -97,9 +96,10 @@ def userConsole(args): print("Reward: " + str(reward)) print("Score: " + str(score)) print("isCompleted: " + str(isCompleted)) - #print("info: " + str(info)) + # print("info: " + str(info)) - print("'help' lists valid action templates, 'objects' lists valid objects, 'valid' lists valid action-object combinations (long!). ") + print("'help' lists valid action templates, 'objects' lists valid" + + " objects, 'valid' lists valid action-object combinations (long!). ") print("'goals' lists progress on subgoals.") print("type 'exit' to quit.") @@ -108,7 +108,6 @@ def userConsole(args): # Sanitize input userInputStr = userInputStr.lower().strip() - # Display run history runHistory = env.getRunHistory() print("Run History:") @@ -143,9 +142,9 @@ def build_simplification_str(args): return args["simplifications_preset"] or ",".join(simplifications) -# # Parse command line arguments -# + + def parse_args(): desc = "Play through a game using the console." parser = argparse.ArgumentParser(desc) diff --git a/examples/random_agent.py b/examples/random_agent.py index 7f175189..1bbf38f5 100644 --- a/examples/random_agent.py +++ b/examples/random_agent.py @@ -9,7 +9,6 @@ def randomModel(args): """ Example random agent -- randomly picks an action at each step. """ exitCommands = ["quit", "exit"] - taskIdx = args['task_num'] simplificationStr = args['simplification_str'] numEpisodes = args['num_episodes'] @@ -18,14 +17,15 @@ def randomModel(args): finalScores = [] # Initialize environment - env = ScienceWorldEnv("", args['jar_path'], envStepLimit = args['env_step_limit']) + env = ScienceWorldEnv("", args['jar_path'], envStepLimit=args['env_step_limit']) taskNames = env.getTaskNames() print("Task Names: " + str(taskNames)) # Choose task taskName = taskNames[taskIdx] # Just get first task - env.load(taskName, 0, "") # Load the task, so we have access to some extra accessors e.g. getRandomVariationTrain() ) + # Load the task, we we have access to some extra accessors e.g. get_random_variation_train() + env.load(taskName, 0, "") maxVariations = env.getMaxVariations(taskName) print("Starting Task " + str(taskIdx) + ": " + taskName) time.sleep(2) @@ -40,18 +40,17 @@ def randomModel(args): initialObs, initialDict = env.reset() # Example accessors - print("Possible actions: " + str(env.getPossibleActions()) ) - print("Possible objects: " + str(env.getPossibleObjects()) ) + print("Possible actions: " + str(env.getPossibleActions())) + print("Possible objects: " + str(env.getPossibleObjects())) templates, lut = env.getPossibleActionObjectCombinations() print("Possible action/object combinations: " + str(templates)) print("Object IDX to Object Referent LUT: " + str(lut)) print("Task Name: " + taskName) print("Task Variation: " + str(randVariationIdx) + " / " + str(maxVariations)) - print("Task Description: " + str(env.getTaskDescription()) ) - print("look: " + str(env.look()) ) - print("inventory: " + str(env.inventory()) ) - print("taskdescription: " + str(env.taskdescription()) ) - + print("Task Description: " + str(env.getTaskDescription())) + print("look: " + str(env.look())) + print("inventory: " + str(env.inventory())) + print("taskdescription: " + str(env.taskdescription())) score = 0.0 isCompleted = False @@ -59,9 +58,9 @@ def randomModel(args): # Run one episode until we reach a stopping condition (including exceeding the maximum steps) userInputStr = "look around" # First action - while (userInputStr not in exitCommands) and (isCompleted == False): + while (userInputStr not in exitCommands) and (isCompleted is False): print("----------------------------------------------------------------") - print ("Step: " + str(curIter)) + print("Step: " + str(curIter)) # Send user input, get response observation, reward, isCompleted, info = env.step(userInputStr) @@ -72,27 +71,27 @@ def randomModel(args): print("Score: " + str(score)) print("isCompleted: " + str(isCompleted)) - # The environment will make isCompleted `True` when a stop condition has happened, or the maximum number of steps is reached. + # The environment will make isCompleted `True` when a stop condition + # has happened, or the maximum number of steps is reached. if (isCompleted): break # Randomly select action - ## Any action (valid or not) - #templates, lut = env.getPossibleActionObjectCombinations() - #print("Possible action/object combinations: " + str(templates)) - #print("Object IDX to Object Referent LUT: " + str(lut)) - #randomTemplate = random.choice( templates ) - #print("Next random action: " + str(randomTemplate)) - #userInputStr = randomTemplate["action"] + # Any action (valid or not) + # templates, lut = env.getPossibleActionObjectCombinations() + # print("Possible action/object combinations: " + str(templates)) + # print("Object IDX to Object Referent LUT: " + str(lut)) + # randomTemplate = random.choice( templates ) + # print("Next random action: " + str(randomTemplate)) + # userInputStr = randomTemplate["action"] - ## Only valid actions + # Only valid actions validActions = env.getValidActionObjectCombinationsWithTemplates() - randomAction = random.choice( validActions ) + randomAction = random.choice(validActions) print("Next random action: " + str(randomAction)) userInputStr = randomAction["action"] - print(list(lut.keys())[-1]) # Sanitize input @@ -110,19 +109,20 @@ def randomModel(args): finalScores.append(score) # Report progress of model - print ("Final score: " + str(score)) - print ("isCompleted: " + str(isCompleted)) + print("Final score: " + str(score)) + print("isCompleted: " + str(isCompleted)) # Save history -- and when we reach maxPerFile, export them to file filenameOutPrefix = args['output_path_prefix'] + str(taskIdx) - env.storeRunHistory(episodeIdx, notes = {'text':'my notes here'} ) + env.storeRunHistory(episodeIdx, notes={'text': 'my notes here'}) env.saveRunHistoriesBufferIfFull(filenameOutPrefix, maxPerFile=args['max_episode_per_file']) # Episodes are finished -- manually save any last histories still in the buffer env.saveRunHistoriesBufferIfFull(filenameOutPrefix, maxPerFile=args['max_episode_per_file'], forceSave=True) - # Show final episode scores to user: - avg = sum([x for x in finalScores if x >=0]) / len(finalScores) # Clip negative scores to 0 for average calculation + # Show final episode scores to user + # Clip negative scores to 0 for average calculation + avg = sum([x for x in finalScores if x >= 0]) / len(finalScores) print("") print("---------------------------------------------------------------------") print(" Summary (Random Agent)") diff --git a/examples/scienceworld-web-server-example.py b/examples/scienceworld-web-server-example.py index ba39b207..b5258afa 100644 --- a/examples/scienceworld-web-server-example.py +++ b/examples/scienceworld-web-server-example.py @@ -27,22 +27,22 @@ def __init__(self): self.out = "" self.title = "" - def setTitle(self, titleStr:str): + def setTitle(self, titleStr: str): self.title = titleStr - def addHeading(self, strIn:str): + def addHeading(self, strIn: str): self.out += "
\n" + strIn + "\n\n" - def addStr(self, strIn:str): + def addStr(self, strIn: str): self.out += strIn + "\n" def getHTML(self): @@ -55,10 +55,8 @@ def getHTML(self): return out -# -# Save JSON history -# -def saveJSONHistory(history:list): + +def saveJSONHistory(history: list): pathOut = "recordings/" if not os.path.isdir(pathOut): os.mkdir(pathOut) @@ -74,9 +72,10 @@ def saveJSONHistory(history:list): dateTimeObj = datetime.now() timestampStr = dateTimeObj.strftime("timestamp%Y-%M-%d-%H-%M-%S") - filenameOut = pathOut + "recording-" + str(taskName) + "-var" + str(varIdx) + "-" + str(result) + "-" + str(timestampStr) + ".json" + filenameOut = (pathOut + "recording-" + str(taskName) + "-var" + str(varIdx) + + "-" + str(result) + "-" + str(timestampStr) + ".json") - print ("Exporting " + filenameOut) + print("Exporting " + filenameOut) with open(filenameOut, "w") as jsonFile: json.dump(history, jsonFile, indent=4, sort_keys=True) @@ -94,10 +93,10 @@ def app(): pywebio.session.set_env(title='ScienceWorld Demo', auto_scroll_bottom=True) # Initialize environment - env = ScienceWorldEnv("", serverPath=None, envStepLimit = 10_000) + env = ScienceWorldEnv("", serverPath=None, envStepLimit=10_000) pywebio_out.put_markdown('## Science World (Text Simulation)') - #put_button("Click here to export transcript", onclick=lambda: , color='success', outline=True) + # put_button("Click here to export transcript", onclick=lambda: , color='success', outline=True) htmlLog.addHeading("Science World (Text Simulation)") htmlLog.addHorizontalRule() @@ -105,18 +104,18 @@ def app(): taskName = pywebio.input.select("Select a task:", env.getTaskNames()) maxVariations = env.getMaxVariations(taskName) - #variationIdx = slider("Task Variation: ", min_value=0, max_value=(maxVariations-1)) + # variationIdx = slider("Task Variation: ", min_value=0, max_value=(maxVariations-1)) variationIdx = pywebio.input.input('Enter the task variation (min = 0, max = ' + str(maxVariations) + "):") variationIdx = int(variationIdx) if variationIdx.isdigit() else 0 # Load environment env.load(taskName, variationIdx, simplificationStr) initialObs, initialDict = env.reset() - #time.sleep(1) + # time.sleep(1) - #print("Possible actions: " + str(env.getPossibleActions()) ) - #print("Possible objects: " + str(env.getPossibleObjects()) ) - #print("Possible action/object combinations: " + str(env.getPossibleActionObjectCombinations())) + # print("Possible actions: " + str(env.getPossibleActions()) ) + # print("Possible objects: " + str(env.getPossibleObjects()) ) + # print("Possible action/object combinations: " + str(env.getPossibleActionObjectCombinations())) pywebio_out.put_table([ ["Task", env.getTaskDescription()], @@ -132,9 +131,9 @@ def app(): userInputStr = "look around" # First action consoleMoveCount = 0 while (userInputStr not in exitCommands): - #put_markdown("### Move " + str(env.getNumMoves()) ) - #htmlLog.addSubheading("Move " + str(env.getNumMoves())) - pywebio_out.put_markdown("### Move " + str(consoleMoveCount) ) + # put_markdown("### Move " + str(env.getNumMoves()) ) + # htmlLog.addSubheading("Move " + str(env.getNumMoves())) + pywebio_out.put_markdown("### Move " + str(consoleMoveCount)) htmlLog.addSubheading("Move " + str(consoleMoveCount)) # Send user input, get response @@ -152,16 +151,18 @@ def app(): # Output (log) htmlLog.addPreformattedText(observation) if (score >= 1.0): - htmlLog.addStr("Task Score: " + str(score) + " (isCompleted: " + str(isCompleted) + ")