Source code for pypownet.agent

__author__ = 'marvinler'
# Copyright (C) 2017-2018 RTE and INRIA (France)
# Authors: Marvin Lerousseau <marvin.lerousseau@gmail.com>
# This file is under the LGPL-v3 license and is part of PyPowNet.
import pypownet.environment
from abc import ABC, abstractmethod


[docs]class Agent(ABC): """ The template to be used to create an agent: any controller of the power grid is expected to be a daughter of this class. """ def __init__(self, environment): """Initialize a new agent.""" assert isinstance(environment, pypownet.environment.RunEnv) self.environment = environment
[docs] @abstractmethod def act(self, observation): """Produces an action given an observation of the environment. Takes as argument an observation of the current state, and returns the chosen action of class Action or np array.""" pass
[docs] def feed_reward(self, action, consequent_observation, rewards_aslist): pass
[docs]class DoNothing(Agent):
[docs] def act(self, observation): action_length = self.environment.action_space.action_length return np.zeros(action_length)
# Examples of baselines agents import numpy as np
[docs]class RandomAction(Agent): """ An example of a baseline controller that produce random actions (ie random line switches and random node switches. """ def __init__(self, environment): super().__init__(environment) self.ioman = ActIOnManager(destination_path='saved_actions_RandomLineSwitch.csv')
[docs] def act(self, observation): action = self.environment.action_space.sample() # # or # action_length = self.environment.action_space.n # action = np.random.choice([0, 1], action_length) return action
[docs]class RandomPointAction(Agent): """ An example of a baseline controller that produce 1 random activation (ie an array with all 0 but one 1). """ def __init__(self, environment): super().__init__(environment) self.ioman = ActIOnManager(destination_path='saved_actions_RandomLineSwitch.csv')
[docs] def act(self, observation): action = self.environment.action_space.get_do_nothing_action() # # or # action_length = self.environment.action_space.n # action = np.zeros(action_length) action[np.random.randint(action.shape[0])] = 1 return action
[docs]class RandomLineSwitch(Agent): """ An example of a baseline controller that randomly switches the status of one random power line per timestep (if the random line is previously online, switch it off, otherwise switch it on). """ def __init__(self, environment): super().__init__(environment) self.ioman = ActIOnManager(destination_path='saved_actions_RandomLineSwitch.csv')
[docs] def act(self, observation): # This agent needs to manipulate actions using grid contextual information, so the observation object needs # to be of class pypownet.environment.Observation: convert from array or raise error if that is not the case if not isinstance(observation, pypownet.environment.Observation): try: observation = self.environment.observation_space.array_to_observation(observation) except Exception as e: raise e # Sanity check: an observation is a structured object defined in the environment file. assert isinstance(observation, pypownet.environment.Observation) action_space = self.environment.action_space # Create template of action with no switch activated (do-nothing action) action = action_space.get_do_nothing_action(as_class_Action=True) action_space.set_lines_status_switch_from_id(action=action, line_id=np.random.randint( action_space.lines_status_subaction_length), new_switch_value=1) # Dump best action into stored actions file self.ioman.dump(action) return action
# No learning (i.e. self.feed_reward does pass)
[docs]class RandomNodeSplitting(Agent): """ Implements a "random node-splitting" agent: at each timestep, this controller will select a random substation (id), then select a random switch configuration such that switched elements of the selected substations change the node within the substation on which they are directly wired. """ def __init__(self, environment): super().__init__(environment) self.ioman = ActIOnManager(destination_path='saved_actions_RandomNodeSplitting.csv')
[docs] def act(self, observation): # This agent needs to manipulate actions using grid contextual information, so the observation object needs # to be of class pypownet.environment.Observation: convert from array or raise error if that is not the case if not isinstance(observation, pypownet.environment.Observation): try: observation = self.environment.observation_space.array_to_observation(observation) except Exception as e: raise e # Sanity check: an observation is a structured object defined in the environment file. assert isinstance(observation, pypownet.environment.Observation) action_space = self.environment.action_space # Create template of action with no switch activated (do-nothing action) action = action_space.get_do_nothing_action(as_class_Action=True) # Select a random substation ID on which to perform node-splitting target_substation_id = np.random.choice(action_space.substations_ids) expected_target_configuration_size = action_space.get_number_elements_of_substation(target_substation_id) # Choses a new switch configuration (binary array) target_configuration = np.random.choice([0, 1], size=(expected_target_configuration_size,)) action_space.set_substation_switches_in_action(action=action, substation_id=target_substation_id, new_values=target_configuration) # Ensure changes have been done on action current_configuration, _ = action_space.get_substation_switches_in_action(action, target_substation_id) assert np.all(current_configuration == target_configuration) # Dump best action into stored actions file self.ioman.dump(action) return action
[docs]class TreeSearchLineServiceStatus(Agent): """ Exhaustive tree search of depth 1 limited to no action + 1 line switch activation """ def __init__(self, environment): super().__init__(environment) self.verbose = True self.ioman = ActIOnManager(destination_path='saved_actions_TreeSearchLineServiceStatus.csv')
[docs] def act(self, observation): # This agent needs to manipulate actions using grid contextual information, so the observation object needs # to be of class pypownet.environment.Observation: convert from array or raise error if that is not the case if not isinstance(observation, pypownet.environment.Observation): try: observation = self.environment.observation_space.array_to_observation(observation) except Exception as e: raise e # Sanity check: an observation is a structured object defined in the environment file. assert isinstance(observation, pypownet.environment.Observation) action_space = self.environment.action_space number_of_lines = self.environment.action_space.lines_status_subaction_length # Simulate the line status switch of every line, independently, and save rewards for each simulation (also store # the actions for best-picking strat) simulated_rewards = [] simulated_actions = [] for l in range(number_of_lines): if self.verbose: print(' Simulating switch activation line %d' % l, end='') # Construct the action where only line status of line l is switched action = action_space.get_do_nothing_action(as_class_Action=True) action_space.set_lines_status_switch_from_id(action=action, line_id=l, new_switch_value=1) simulated_reward = self.environment.simulate(action=action) # Store ROI values simulated_rewards.append(simulated_reward) simulated_actions.append(action) if self.verbose: print('; expected reward %.5f' % simulated_reward) # Also simulate the do nothing action if self.verbose: print(' Simulating do-nothing action', end='') donothing_action = self.environment.action_space.get_do_nothing_action() donothing_simulated_reward = self.environment.simulate(action=donothing_action) simulated_rewards.append(donothing_simulated_reward) simulated_actions.append(donothing_action) # Seek for the action that maximizes the reward best_simulated_reward = np.max(simulated_rewards) best_action = simulated_actions[simulated_rewards.index(best_simulated_reward)] # Dump best action into stored actions file self.ioman.dump(best_action) if self.verbose: if simulated_rewards.index(best_simulated_reward) == len(simulated_rewards)-1: print(' Best simulated action: do-nothing') else: print(' Best simulated action: disconnect line %d; expected reward: %.5f' % ( simulated_rewards.index(best_simulated_reward), best_simulated_reward)) return best_action
[docs]class GreedySearch(Agent): """ This agent is a tree-search model of depth 1, that is constrained to modifiying at most 1 substation configuration or at most 1 line status. This controller used the simulate method of the environment, by testing every 1-line status switch action, every new configuration for substations with at least 4 elements, as well as the do-nothing action. Then, it will seek for the best reward and return the associated action, expecting the maximum reward for the action pool it can reach. Note that the simulate method is only an approximation of the step method of the environment, and in three ways: * simulate uses the DC mode, while step is in AC * simulate uses only the predictions given to the player to simulate the next timestep injections * simulate can not compute the hazards that are supposed to come at the next timestep """ def __init__(self, environment): super().__init__(environment) self.verbose = True self.ioman = ActIOnManager(destination_path='saved_actions.csv')
[docs] def act(self, observation): import itertools # This agent needs to manipulate actions using grid contextual information, so the observation object needs # to be of class pypownet.environment.Observation: convert from array or raise error if that is not the case if not isinstance(observation, pypownet.environment.Observation): try: observation = self.environment.observation_space.array_to_observation(observation) except Exception as e: raise e # Sanity check: an observation is a structured object defined in the environment file. assert isinstance(observation, pypownet.environment.Observation) action_space = self.environment.action_space number_lines = action_space.lines_status_subaction_length # Will store reward, actions, and action name, then eventually pick the maximum reward and retrieve the # associated values rewards, actions, names = [], [], [] # Test doing nothing if self.verbose: print(' Simulation with no action', end='') action = action_space.get_do_nothing_action() _, reward_aslist, _, _ = self.environment.simulate(action, do_sum=False) reward = sum(reward_aslist) if self.verbose: print('; reward: [', ', '.join(['%.2f' % c for c in reward_aslist]), '] =', reward) rewards.append(reward) actions.append(action) names.append('no action') # Test every line opening for l in range(number_lines): if self.verbose: print(' Simulation with switching status of line %d' % l, end='') action = action_space.get_do_nothing_action(as_class_Action=True) action_space.set_lines_status_switch_from_id(action=action, line_id=l, new_switch_value=1) _, reward_aslist, _, _ = self.environment.simulate(action, do_sum=False) reward = sum(reward_aslist) if self.verbose: print('; reward: [', ', '.join(['%.2f' % c for c in reward_aslist]), '] =', reward) rewards.append(reward) actions.append(action) names.append('switching status of line %d' % l) # For every substation with at least 4 elements, try every possible configuration for the switches for substation_id in action_space.substations_ids: substation_n_elements = action_space.get_number_elements_of_substation(substation_id) if 6 > substation_n_elements > 3: # Look through all configurations of n_elements binary vector with first value fixed to 0 for configuration in list(itertools.product([0, 1], repeat=substation_n_elements - 1)): new_configuration = [0] + list(configuration) if self.verbose: print(' Simulation with change in topo of sub. %d with switches %s' % ( substation_id, repr(new_configuration)), end='') # Construct action action = action_space.get_do_nothing_action(as_class_Action=True) action_space.set_substation_switches_in_action(action=action, substation_id=substation_id, new_values=new_configuration) _, reward_aslist, _, _ = self.environment.simulate(action, do_sum=False) reward = sum(reward_aslist) if self.verbose: print('; reward: [', ', '.join(['%.2f' % c for c in reward_aslist]), '] =', reward) rewards.append(reward) actions.append(action) names.append('change in topo of sub. %d with switches %s' % (substation_id, repr(new_configuration))) # Take the best reward, and retrieve the corresponding action best_reward = max(rewards) best_index = rewards.index(best_reward) best_action = actions[best_index] best_action_name = names[best_index] # Dump best action into stored actions file self.ioman.dump(best_action) if self.verbose: print('Action chosen: ', best_action_name, '; expected reward %.4f' % best_reward) return best_action
[docs]class ActionsFileReaderControler(Agent): def __init__(self, environment): super().__init__(environment) # Loads manager + actions ioman = ActIOnManager(delete=False) self.actions = ioman.load('saved_actions.csv') self.action_ctr = 0 self.number_actions = len(self.actions) number_do_nothing = np.sum([np.sum(action) == 0 for action in self.actions]) print('% of do-nothing:', float(number_do_nothing) / float(self.number_actions))
[docs] def act(self, observation): action = self.actions[self.action_ctr] # Correspondance first action to be played = first of list self.action_ctr += 1 return action
############### # Helper agents ############### import os
[docs]class ActIOnManager(object): def __init__(self, destination_path='saved_actions.csv', delete=True): self.actions = [] self.destination_path = destination_path print('Storing actions at', destination_path) # Delete last path with same name by default!!! if delete and os.path.exists(destination_path): os.remove(destination_path)
[docs] def dump(self, action): with open(self.destination_path, 'a') as f: f.write('{}\n'.format(action))
[docs] @staticmethod def load(filepath): with open(filepath, 'r') as f: lines = f.read().splitlines() actions = [[int(l) for l in line.split(',')] for line in lines] assert 0 in np.unique(actions) and 1 in np.unique(actions) and len(np.unique(actions)) == 2 return actions
[docs]class FlowsSaver(Agent): def __init__(self, environment): """Initialize a new agent.""" super().__init__(environment) assert isinstance(environment, pypownet.environment.RunEnv) self.environment = environment self.destination_path = 'saved_flows.csv'
[docs] def act(self, observation): # This agent needs to manipulate actions using grid contextual information, so the observation object needs # to be of class pypownet.environment.Observation: convert from array or raise error if that is not the case if not isinstance(observation, pypownet.environment.Observation): try: observation = self.environment.observation_space.array_to_observation(observation) except Exception as e: raise e open(self.destination_path, 'a').write(','.join(list(map(str, observation.ampere_flows))) + '\n') return self.environment.action_space.get_do_nothing_action()