from typing import Union from etc.messages import Message import etc.gamestate as gs # Keep the following in short-term memory, for each potency option used. # STATE VARIABLES # Number of red opinions [int] # Number of blue opinions [int] # Number of red followers [int] # Blue energy [int - convert from continuous float to discrete] # # PARAMETERS IN CALCULATION # Round number # Change in red opinion # Change in blue opinion # Change in red followers # Change in blue energy # Used to update state_action_lut at end of game by the heuristic, # For blue: # ((Change in blue opinions) - (Change in red opinions) - (Change in Red followers) - (Change in blue energy)) * (Rounds to win) # Red has the opposite heuristic # There, learning. Are ya happy????? class ActionState: BINS = 5 # STATE (digitized from continuous) n_blue_opinion_bin: int n_red_opinion_bin: int n_red_followers_bin: int blue_energy_bin: int iteration: int # ACTION action: Message # RESULTING STATE change_n_blue_opinion: int change_n_red_opinion: int change_n_red_followers: int change_blue_energy: int # Assume value is between 0 and range. @staticmethod def bin(value: Union[float, int], bins: int, range: Union[float, int]) -> int: clamp_value = max(min(value, range), 0) return int(clamp_value / (range / (bins - 1))) def __init__(self, action: Message, start_state: "gs.GameState", next_state: "gs.GameState") -> None: start_n_red_opinion, start_n_blue_opinion = start_state.count_majority() next_n_red_opinion, next_n_blue_opinion = next_state.count_majority() green_population = start_state.n_green_agents max_energy = start_state.blue_agent.initial_energy # STATE self.n_red_opinion_bin = ActionState.bin(start_n_red_opinion, ActionState.BINS, green_population) self.n_blue_opinion_bin = ActionState.bin(start_n_blue_opinion, ActionState.BINS, green_population) self.n_red_followers_bin = ActionState.bin(start_state.red_agent.red_followers, ActionState.BINS, green_population) self.blue_energy_bin = ActionState.bin(start_state.blue_agent.blue_energy, ActionState.BINS, max_energy) # ACTION self.iteration = start_state.iteration self.action = action self.change_n_blue_opinion = next_n_blue_opinion - start_n_blue_opinion self.change_n_red_opinion = next_n_red_opinion - start_n_red_opinion self.change_n_red_followers = next_state.red_agent.red_followers - start_state.red_agent.red_followers self.change_blue_energy = next_state.blue_agent.blue_energy - start_state.blue_agent.blue_energy pass # Relative to the blue agent - invert for red agent. def rate_state_action(self, round_end: int, lost: bool) -> float: local_effect = self.change_n_blue_opinion + self.change_n_red_opinion + self.change_blue_energy + self.change_n_red_followers return (-1 if lost else 1) * (round_end - self.iteration) * local_effect