Source code for pypownet.runner

__author__ = 'marvinler'
# Copyright (C) 2017-2018 RTE and INRIA (France)
# Authors: Marvin Lerousseau <marvin.lerousseau@gmail.com>
# This file is under the LGPL-v3 license and is part of PyPowNet.
""" This is the machinnery that runs your agent in an environment. Note that this is not the machinnery of the update of the
environment; it is purely related to perform policy inference at each timestep given the last observation, and feeding
the reward signal to the appropriate function (feed_reward) of the Agent.

This is not intented to be modified during the practical.
"""
from pypownet.environment import RunEnv
from pypownet.agent import Agent
import logging
import logging.handlers
import csv
import datetime

LOG_FILENAME = 'runner.log'


[docs]class TimestepTimeout(Exception):
    pass


[docs]class Runner(object):
    def __init__(self, environment, agent, render=False, verbose=False, vverbose=False, parameters=None, level=None,
                 max_iter=None, log_filepath='runner.log', machinelog_filepath='machine_logs.csv'):
        # Sanity checks: both environment and agent should inherit resp. RunEnv and Agent
        assert isinstance(environment, RunEnv)
        assert isinstance(agent, Agent)

        # Logger part
        self.logger = logging.getLogger('pypownet')
        if machinelog_filepath is not None:
            self.csv_writer = csv.writer(open(machinelog_filepath, 'w'), delimiter=';')
            self.csv_writer.writerow(['param_env_name', 'level', 'chronic_name', 'max_iter',
                                      'timestep', 'time', 'game_over', 'timestep_reward_aslist', 'timestep_reward',
                                      'cumulated_reward'])
            self.parameters = parameters
            self.level = level
            self.max_iter = max_iter
        else:
            self.csv_writer = None
            self.parameters, self.level, self.max_iter = None, None, None

        # Always create a log file for runners
        fh = logging.FileHandler(filename=log_filepath, mode='w+')
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
        self.logger.addHandler(fh)

        if verbose or vverbose:
            # create console handler, set level to debug, create formatter
            ch = logging.StreamHandler()
            ch.setLevel(logging.DEBUG if vverbose and verbose else logging.INFO)
            ch.setFormatter(logging.Formatter('%(levelname)s        %(message)s'))
            self.ch = ch
            # add ch to logger
            self.logger.addHandler(ch)
            self.logger.setLevel(logging.DEBUG if vverbose else logging.INFO)

        self.environment = environment
        self.agent = agent
        self.verbose = verbose
        self.render = render

        self.max_seconds_per_timestep = self.environment.game.get_max_seconds_per_timestep()

        if self.render:
            self.environment.render()

[docs]    def step(self, observation):
        """
        Performs a full RL step: the agent acts given an observation, receives and process the reward, and the env is
        resetted if done was returned as True; this also logs the variables of the system including actions,
        observations.
        :param observation: input observation to be given to the agent
        :return: (new observation, action taken, reward received)
        """
        self.logger.debug('observation: ' + str(self.environment.observation_space.array_to_observation(observation)))
        action = self.agent.act(observation)

        # Update the environment with the chosen action
        observation, reward_aslist, done, info = self.environment.step(action, do_sum=False)
        if done:
            self.logger.warning('\b\b\bGAME OVER! Resetting grid... (hint: %s)' % info.text)
            observation = self.environment.process_game_over()
        elif info:
            self.logger.warning(info.text)

        reward = sum(reward_aslist)

        if self.render:
            self.environment.render()

        self.agent.feed_reward(action, observation, reward_aslist)

        self.logger.debug('action: {}'.format(action))
        self.logger.debug('reward: {}'.format('[' + ','.join(list(map(str, reward_aslist))) + ']'))
        self.logger.debug('done: {}'.format(done))
        self.logger.debug('info: {}'.format(info if not info else info.text))

        return observation, action, reward, reward_aslist, done

[docs]    def loop(self, iterations, epochs=1):
        """
        Runs the simulator for the given number of iterations time the number of episodes.
        :param iterations: int of number of iterations per episode
        :param epochs: int of number of episodes, each resetting the environment at the beginning
        :return:
        """
        cumul_rew = 0.0
        for i_episode in range(epochs):
            # clean restart of environment at the beginning of each epoch
            self.logger.warning('Resetting environment...')
            observation = self.environment.reset()
            for i_iter in range(1, iterations + 1):
                (observation, action, reward, reward_aslist, done) = self.step(observation)
                cumul_rew += reward

                # save some info in txt and csv loggers
                self.logger.info("step %d/%d - reward: %.2f; cumulative reward: %.2f" %
                                 (i_iter, iterations, reward, cumul_rew))
                self.dump_machinelogs(i_iter, done, reward, reward_aslist, cumul_rew,
                                      self.environment.get_current_datetime())

        return cumul_rew

[docs]    def dump_machinelogs(self, timestep_id, done, reward, reward_aslist, cumul_rew, datetime):
        if self.csv_writer is None:
            return

        param_env_name = self.parameters
        level = self.level
        chronic_name = self.environment.get_current_chronic_name()
        max_iter = self.max_iter
        timestep = timestep_id
        time = datetime.strftime("%Y-%m-%d %H:%M")
        game_over = done
        timestep_reward_aslist = reward_aslist
        timestep_reward = reward
        cumulated_reward = cumul_rew

        self.csv_writer.writerow([param_env_name, level, chronic_name, max_iter, timestep, time, game_over,
                                  timestep_reward_aslist, timestep_reward, cumulated_reward])