Source code for pypownet.runner

__author__ = 'marvinler'
# Copyright (C) 2017-2018 RTE and INRIA (France)
# Authors: Marvin Lerousseau <marvin.lerousseau@gmail.com>
# This file is under the LGPL-v3 license and is part of PyPowNet.
""" This is the machinnery that runs your agent in an environment. Note that this is not the machinnery of the update of the
environment; it is purely related to perform policy inference at each timestep given the last observation, and feeding
the reward signal to the appropriate function (feed_reward) of the Agent.

This is not intented to be modified during the practical.
"""
from pypownet.environment import RunEnv
from pypownet.agent import Agent
import logging
import logging.handlers
import csv
import datetime

LOG_FILENAME = 'runner.log'


[docs]class TimestepTimeout(Exception): pass
[docs]class Runner(object): def __init__(self, environment, agent, render=False, verbose=False, vverbose=False, parameters=None, level=None, max_iter=None, log_filepath='runner.log', machinelog_filepath='machine_logs.csv'): # Sanity checks: both environment and agent should inherit resp. RunEnv and Agent assert isinstance(environment, RunEnv) assert isinstance(agent, Agent) # Logger part self.logger = logging.getLogger('pypownet') if machinelog_filepath is not None: self.csv_writer = csv.writer(open(machinelog_filepath, 'w'), delimiter=';') self.csv_writer.writerow(['param_env_name', 'level', 'chronic_name', 'max_iter', 'timestep', 'time', 'game_over', 'timestep_reward_aslist', 'timestep_reward', 'cumulated_reward']) self.parameters = parameters self.level = level self.max_iter = max_iter else: self.csv_writer = None self.parameters, self.level, self.max_iter = None, None, None # Always create a log file for runners fh = logging.FileHandler(filename=log_filepath, mode='w+') fh.setLevel(logging.DEBUG) fh.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) self.logger.addHandler(fh) if verbose or vverbose: # create console handler, set level to debug, create formatter ch = logging.StreamHandler() ch.setLevel(logging.DEBUG if vverbose and verbose else logging.INFO) ch.setFormatter(logging.Formatter('%(levelname)s %(message)s')) self.ch = ch # add ch to logger self.logger.addHandler(ch) self.logger.setLevel(logging.DEBUG if vverbose else logging.INFO) self.environment = environment self.agent = agent self.verbose = verbose self.render = render self.max_seconds_per_timestep = self.environment.game.get_max_seconds_per_timestep() if self.render: self.environment.render()
[docs] def step(self, observation): """ Performs a full RL step: the agent acts given an observation, receives and process the reward, and the env is resetted if done was returned as True; this also logs the variables of the system including actions, observations. :param observation: input observation to be given to the agent :return: (new observation, action taken, reward received) """ self.logger.debug('observation: ' + str(self.environment.observation_space.array_to_observation(observation))) action = self.agent.act(observation) # Update the environment with the chosen action observation, reward_aslist, done, info = self.environment.step(action, do_sum=False) if done: self.logger.warning('\b\b\bGAME OVER! Resetting grid... (hint: %s)' % info.text) observation = self.environment.process_game_over() elif info: self.logger.warning(info.text) reward = sum(reward_aslist) if self.render: self.environment.render() self.agent.feed_reward(action, observation, reward_aslist) self.logger.debug('action: {}'.format(action)) self.logger.debug('reward: {}'.format('[' + ','.join(list(map(str, reward_aslist))) + ']')) self.logger.debug('done: {}'.format(done)) self.logger.debug('info: {}'.format(info if not info else info.text)) return observation, action, reward, reward_aslist, done
[docs] def loop(self, iterations, epochs=1): """ Runs the simulator for the given number of iterations time the number of episodes. :param iterations: int of number of iterations per episode :param epochs: int of number of episodes, each resetting the environment at the beginning :return: """ cumul_rew = 0.0 for i_episode in range(epochs): # clean restart of environment at the beginning of each epoch self.logger.warning('Resetting environment...') observation = self.environment.reset() for i_iter in range(1, iterations + 1): (observation, action, reward, reward_aslist, done) = self.step(observation) cumul_rew += reward # save some info in txt and csv loggers self.logger.info("step %d/%d - reward: %.2f; cumulative reward: %.2f" % (i_iter, iterations, reward, cumul_rew)) self.dump_machinelogs(i_iter, done, reward, reward_aslist, cumul_rew, self.environment.get_current_datetime()) return cumul_rew
[docs] def dump_machinelogs(self, timestep_id, done, reward, reward_aslist, cumul_rew, datetime): if self.csv_writer is None: return param_env_name = self.parameters level = self.level chronic_name = self.environment.get_current_chronic_name() max_iter = self.max_iter timestep = timestep_id time = datetime.strftime("%Y-%m-%d %H:%M") game_over = done timestep_reward_aslist = reward_aslist timestep_reward = reward cumulated_reward = cumul_rew self.csv_writer.writerow([param_env_name, level, chronic_name, max_iter, timestep, time, game_over, timestep_reward_aslist, timestep_reward, cumulated_reward])