Skip to content
Snippets Groups Projects
Commit 75a78ab0 authored by Tessaris Sergio's avatar Tessaris Sergio
Browse files

feat: cli and player game termination

parent f78bb455
No related branches found
No related tags found
No related merge requests found
......@@ -5,12 +5,14 @@ Command line interface
"""
import argparse
import io
import json
import os
import sys
from . import __version__
from .gridworld import GridWorld
from .runner import get_subclasses, check_entrypoint, get_player_class, get_world_class, play_episode, worlds
from .runner import get_subclasses, check_entrypoint, get_player_class, get_world_class, run_episode, worlds
def gridrunner(*args):
......@@ -30,6 +32,7 @@ def gridrunner(*args):
parser.add_argument('--noshow', action='store_false', help="prevent the printing the world at each step")
parser.add_argument('--out', '-o', type=argparse.FileType('w'), default=sys.stdout, help="write output to file")
parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
parser.add_argument('--log', '-l', type=argparse.FileType('w'), help="write the log of the games to file (JSON)")
args_dict = vars(parser.parse_args(args))
name = args_dict['name']
......@@ -38,7 +41,8 @@ def gridrunner(*args):
world_type = args_dict['world']
horizon = args_dict['horizon']
show = args_dict['noshow']
outf = args_dict['out']
outf: io.TextIOBase = args_dict['out']
game_log: io.TextIOBase = args_dict['log']
player_class = get_player_class(obj_ref, path=path)
world_class = get_world_class(world_type)
......@@ -46,16 +50,33 @@ def gridrunner(*args):
if name is None:
name = player_class.__name__
player = player_class(name=name)
if game_log is not None:
print('[', file=game_log)
if len(args_dict['infiles']) > 0:
morelogs = False
for world in worlds(args_dict['infiles'], world_class):
play_episode(world, player_class, player_name=name, horizon=horizon, show=show, outf=outf)
glog = run_episode(world, player, horizon=horizon, show=show, outf=outf)
if game_log is not None:
if morelogs:
print(',', file=game_log)
else:
morelogs = True
json.dump(glog, game_log)
else:
world = world_class.random()
# show world definition
print('-' * 10 + ' Playing on world: ' + '-' * 10, file=outf)
world.to_JSON(outf)
print('\n' + '-' * 40, file=outf)
play_episode(world, player_class, player_name=name, horizon=horizon, show=show, outf=outf)
glog = run_episode(world, player, horizon=horizon, show=show, outf=outf)
if game_log is not None:
json.dump(glog, game_log)
if game_log is not None:
print(']', file=game_log)
return 0
......
......@@ -132,6 +132,11 @@ class Agent(WorldObject):
"""Execute an action and return the reward of the action."""
raise NotImplementedError
def suicide(self) -> int:
"""Kill the agent, returns the outcome of the action."""
# I don't know how to die
return 0
def on_done(self):
"""Called when the episode terminate."""
pass
......@@ -426,10 +431,16 @@ class Eater(Agent):
self._foodcount = 0
self._reward = 0
self.FOOD_BONUS = 10
self._alive = True
def charSymbol(self):
return '🐒'
@property
def isAlive(self):
"""Return true is the agent can still execute actions."""
return self._alive
@property
def reward(self) -> int:
"""The current accumulated reward"""
......@@ -454,6 +465,12 @@ class Eater(Agent):
self._reward += cost
return cost
def suicide(self) -> int:
"""Kill the agent, returns the outcome of the action."""
self._alive = False
# no penalty for suicide
return 0
def percept(self) -> 'Eater.Percept':
return self.Percept(
position=self.location,
......
......@@ -8,10 +8,11 @@ import argparse
import copy
import importlib
import io
import json
import os
import re
import sys
from typing import Iterator, Type
from typing import Any, Dict, Iterator, Type
from .gridworld import Agent, GridWorld, GridWorldException, EaterWorld
from .player import Player, InformedPlayer
......@@ -85,14 +86,22 @@ def get_world_class(name: str) -> Type[GridWorld]:
def worlds(files, world_class: Type[GridWorld]) -> Iterator[GridWorld]:
for fd in files:
try:
world = world_class.from_JSON(fd)
yield world
world_defs = json.load(fd)
if isinstance(world_defs, dict):
yield world_class.from_dict(world_defs)
elif isinstance(world_defs, list):
for wd in world_defs:
try:
yield world_class.from_dict(wd)
except Exception as e:
print('Skipping world {}: {}'.format(wd, e))
continue
except Exception as e:
print('Skipping {}: {}'.format(fd, e))
continue
def run_episode(world: GridWorld, player: Player, agent: Agent = None, horizon: int = 0, show=True, outf: io.TextIOBase = None):
def run_episode(world: GridWorld, player: Player, agent: Agent = None, horizon: int = 0, show=True, outf: io.TextIOBase = None) -> Dict[str, Any]:
"""Run an episode on the world using the player to control the agent. The horizon specifies the maximum number of steps, 0 or None means no limit. If show is true then the world is printed ad each iteration before the player's turn.
Raise the exception GridWorldException is the agent is not in the world.
......@@ -105,8 +114,11 @@ def run_episode(world: GridWorld, player: Player, agent: Agent = None, horizon:
show (bool, optional): whether to show the environment before a step. Defaults to True.
outf (TextIOBase, optional): writes output to the given stream. Defaults to stdout.
Returns:
dictionary (JSON encodable) with the log of the game
Raises:
GridWorldException: [description]
GridWorldException: if there are problems with the world (e.g. there's no agent)
"""
if outf is None:
outf = sys.stdout
......@@ -117,11 +129,23 @@ def run_episode(world: GridWorld, player: Player, agent: Agent = None, horizon:
raise GridWorldException(f'No agent in this {world}')
elif agent not in world.objects:
raise GridWorldException('Missing agent {}, cannot run the episode'.format(agent))
game_log = {
'world': world.to_dict(),
'agent': agent.name,
'player': player.name,
'actions': [],
'exceptions': [],
'maxsteps': False
}
# inform the player of the start of the episode
if isinstance(player, InformedPlayer):
player.start_episode(copy.deepcopy(world))
else:
player.start_episode()
try:
if isinstance(player, InformedPlayer):
player.start_episode(copy.deepcopy(world))
else:
player.start_episode()
except Exception as e:
print(f'Exception in Player.start_episode: {e}', file=outf)
game_log['exceptions'].append(f'Player.start_episode: {e}')
step = 0
while not horizon or step < horizon:
if agent.success():
......@@ -133,23 +157,42 @@ def run_episode(world: GridWorld, player: Player, agent: Agent = None, horizon:
if show:
print(world, file=outf)
action = player.play(step, agent.percept(), agent.actions())
try:
action = player.play(step, agent.percept(), agent.actions())
except Exception as e:
action = None
print(f'Exception in Player.play: {e}', file=outf)
game_log['exceptions'].append(f'Player.play: {e}')
if action is None:
game_log['actions'].append(action)
agent.suicide()
print('Episode terminated by the player {}.'.format(player.name), file=outf)
break
game_log['actions'].append(action.name)
reward = agent.do(action)
print('Step {}: agent {} executing {} -> reward {}'.format(step, agent.name, action.name, reward), file=outf)
player.feedback(action, reward, agent.percept())
try:
player.feedback(action, reward, agent.percept())
except Exception as e:
print(f'Exception in Player.feedback: {e}', file=outf)
game_log['exceptions'].append(f'Player.feedback: {e}')
step += 1
else:
print('Episode terminated by maximum number of steps ({}).'.format(horizon), file=outf)
game_log['maxsteps'] = True
player.end_episode(agent.reward, agent.isAlive, agent.success)
try:
player.end_episode(agent.reward, agent.isAlive, agent.success())
except Exception as e:
print(f'Exception in Player.end_episode: {e}', file=outf)
game_log['exceptions'].append(f'Player.end_episode: {e}')
game_log['reward'] = agent.reward
game_log['alive'] = agent.isAlive
game_log['success'] = agent.success()
print(world, file=outf)
print('Episode terminated with a reward of {} for agent {}'.format(agent.reward, agent.name), file=outf)
def play_episode(world: GridWorld, player_class: Type[Player], player_name: str = None, horizon: int = 100, show: bool = True, outf: io.TextIOBase = None):
run_episode(world, player_class(name=player_name), horizon=horizon, show=show, outf=outf)
return game_log
......@@ -229,6 +229,12 @@ class Hunter(Agent):
self._reward += reward
return reward
def suicide(self) -> int:
"""Kill the agent, returns the outcome of the action."""
self._alive = False
reward = -1000
self._reward += reward
return reward
class WumpusWorld(GridWorld):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment