#!/usr/bin/python3 import argparse import random import copy # Each strategy is defined as follows: # [first move, reaction to defection, reaction to co-operation] # where 0 is defection and 1 is co-operation # I don't actually know the names for all of these strategies so I'm going to make some up: strategies = [ [0, 0, 0], # Always defect. [0, 0, 1], # Grim tit-for-tat. [0, 1, 0], # Defect at first, then do opposite of what opponent did last. [0, 1, 1], # Defect at first, then always co-operate. [1, 0, 0], # Feint co-operation, then always defect. [1, 0, 1], # Tit-for-tat. [1, 1, 0], # Co-operate at first, then do opposite of what opponent did last. [1, 1, 1] # Always co-operate. ] def initialise_population(size): """ Initialises a population of strategies for the Iterated Prisoner's Dilemma. Args: size (int): The size of the population Returns: population (list): A list of strategies """ # Since there are only 8 possible strategies, to initialise the population just perform a random over-sampling of the space. return [copy.deepcopy(strategy) for strategy in random.choices(strategies, k=size)] def fitness(agent, num_iterations, noise_level): """ Play the Iterated Prisoner's Dilemma against a number of fixed strategies and return its score. Args: agent1 (list): the strategy of agent1. iterations (int): the number of iterations to play. noise_level (float): the probability that the opponent's last move will be misrepresented to the agent Returns: fitness (int): the score obtained by agent1. """ fitness = 0 fixed_strategies = [ [0, 0, 0], # Always defect. # [0, 0, 1], # Grim tit-for-tat. # [0, 1, 0], # Grim opposite day: defect at first, then do opposite of what opponent did last. # [0, 1, 1], # Self-sabotage: defect at first, then always co-operate. # [1, 0, 0], # Feint co-operation, then always defect. [1, 0, 1], # Tit-for-tat. # [1, 1, 0], # Opposite day: co-operate at first, then do opposite of what opponent did last. [1, 1, 1], # Always co-operate. ] for fixed_strategy in fixed_strategies: agent_last_move = None fixed_strategy_last_move = None for iteration in range(num_iterations): if (iteration == 0): agent_move = agent[0] fixed_strategy_move = fixed_strategy[0] else: # Set an agent's move to its reaction to co-operation if the other agent's last move was co-operation (1), else set it to its reaction to defection. agent_move = agent[2] if fixed_strategy_last_move else agent[1] fixed_strategy_move = fixed_strategy[2] if agent_last_move else fixed_strategy[1] agent_last_move = agent_move if random.random() > noise_level else 1 - agent_move fixed_strategy_last_move = fixed_strategy_move if random.random() > noise_level else 1 - fixed_strategy_move match (agent_move, fixed_strategy_move): case (0, 0): fitness += 1 case (0, 1): fitness += 5 case (1, 0): fitness += 0 case (1, 1): fitness += 3 return fitness def list_fitnesses(population, num_iterations, noise_level): """ Calculate the fitness of each agent in a population. Args: population (list): the population of strategies. iterations (int): the number of iterations to play. noise_level (float): the probability that the opponent's last move will be misrepresented to the agent Returns: fitnesses (list): the fitness of each agent. """ fitnesses = [] for agent in population: fitnesses.append(fitness(agent, num_iterations, noise_level)) return fitnesses def get_best(population, fitnesses, generation): """ Get the best agent in a population, given a list of fitnesses. Args: population (list): the population of strategies. fitnesses (list): the fitness of each agent. Returns: best_agent (list): the best agent. """ best_index = fitnesses.index(max(fitnesses)) best_agent = { "strategy": list(population[best_index]), "fitness": fitnesses[best_index], "generation": generation } return best_agent def tournament_selection(population, fitnesses, num_survivors, tournament_size=3): """ Select agents from a population based on their fitness. Args: population (list): the population of strategies. fitnesses (list): the fitness of each agent. num_survivors (int): the number of agents to select. Returns: survivors (list): the selected agents. """ survivors = [] for _ in range(num_survivors): tournament = random.sample(list(zip(population, fitnesses)), tournament_size) winner = max(tournament, key=lambda agent: agent[1]) survivors.append(copy.deepcopy(winner[0])) # Deep copy to prevent unintended modifications return survivors def crossover(parents, crossover_rate, num_offspring): """ Perform single-point crossover on selected parents. Args: parents (list): List of selected strategies. crossover_rate (float): Probability of crossover occurring. num_offspring (int): Number of offspring to generate. Returns: offspring (list): List of new strategies. """ offspring = [] while len(offspring) < num_offspring: if random.random() < crossover_rate: p1, p2 = random.sample(parents, 2) crossover_point = random.randint(1, 2) child1 = copy.deepcopy(p1[:crossover_point] + p2[crossover_point:]) child2 = copy.deepcopy(p2[:crossover_point] + p1[crossover_point:]) offspring.extend([child1, child2]) else: offspring.append(copy.deepcopy(random.choice(parents))) return offspring[:num_offspring] def mutate(offspring, mutation_rate): """ Perform bit-flip mutation on offspring. Args: offspring (list): List of offspring strategies. mutation_rate (float): Probability of mutation occurring per individual. Returns: mutated_offspring (list): List of mutated strategies. """ mutated_offspring = copy.deepcopy(offspring) # Deep copy to prevent modifying original offspring for i in range(len(mutated_offspring)): if random.random() < mutation_rate: mutation_point = random.randint(0, 2) mutated_offspring[i][mutation_point] = 1 - mutated_offspring[i][mutation_point] return mutated_offspring def evolve(size, num_generations, give_up_after, num_iterations, selection_proportion, crossover_rate, mutation_rate, noise_level): """ Evolves strategies over a number of generations for the Iterated Prisoner's Dilemma. Args: size (int): Initial population size num_generations (int): Number of generations give_up_after (int): Number of generations to give up after if best solution has remained unchanged selection_proportion (float): The proportion of the population to be selected (survive) on each generation crossover_rate (float): Probability of a selected pair of solutions to sexually reproduce mutation_rate (float): Probability of a selected offspring to undergo mutation noise_level (float): The probability that the opponent's last move will be misrepresented to the agent Returns: results (str): The results of the evolution in TSV format """ population = initialise_population(size) fitnesses = list_fitnesses(population, num_iterations, noise_level) current_best = get_best(population, fitnesses, 0) results = ["Generation\tBestFitness\tBestStrategy\tAvgFitness\t000\t001\t010\t011\t100\t101\t110\t111"] results.append(f"0\t{current_best['fitness']}\t{"".join(map(str, current_best['strategy']))}\t{sum(fitnesses) / len(fitnesses)}\t{population.count([0,0,0])}\t{population.count([0,0,1])}\t{population.count([0,1,0])}\t{population.count([0,1,1])}\t{population.count([1,0,0])}\t{population.count([1,0,1])}\t{population.count([1,1,0])}\t{population.count([1,1,1])}") for generation in range(1, num_generations): population = tournament_selection(population, fitnesses, int(len(population) *selection_proportion)) offspring = crossover(population, crossover_rate, size - len(population)) population += mutate(offspring, mutation_rate) fitnesses = list_fitnesses(population, num_iterations, noise_level) generation_best = get_best(population, fitnesses, generation) if (generation_best['fitness'] > current_best['fitness']): current_best = generation_best print(f"New best strategy: {current_best['strategy']}, {current_best['fitness']}") results.append(f"{generation}\t{current_best['fitness']}\t{"".join(map(str, current_best['strategy']))}\t{sum(fitnesses) / len(fitnesses)}\t{population.count([0,0,0])}\t{population.count([0,0,1])}\t{population.count([0,1,0])}\t{population.count([0,1,1])}\t{population.count([1,0,0])}\t{population.count([1,0,1])}\t{population.count([1,1,0])}\t{population.count([1,1,1])}") if (generation - current_best['generation'] >= give_up_after): break print(f"Best strategy: {current_best['strategy']}") print(f"Fitness: {current_best['fitness']}") print(f"Generation: {current_best['generation']}") return results if __name__ == "__main__": parser = argparse.ArgumentParser(description="Program to evolve strategies for the Iterated Prisoner's Dilemma") parser.add_argument("-s", "--size", type=int, help="Initial population size", required=False, default=75) parser.add_argument("-g", "--num-generations", type=int, help="Number of generations", required=False, default=500) parser.add_argument("-a", "--give-up-after", type=int, help="Number of generations to give up after if best solution has remained unchanged", required=False, default=50) parser.add_argument("-i", "--num-iterations", type=int, help="Number of iterations of the dilemma between two agents", required=False, default=10) parser.add_argument("-p", "--selection-proportion", type=float, help="The proportion of the population to be selected (survive) on each generation", required=False, default=0.2) parser.add_argument("-c", "--crossover-rate", type=float, help="Probability of a selected pair of solutions to sexually reproduce", required=False, default=0.8) parser.add_argument("-m", "--mutation-rate", type=float, help="Probability of a selected offspring to undergo mutation", required=False, default=0.1) parser.add_argument("-o", "--output-file", type=str, help="File to write TSV results to", required=False, default="output.tsv") parser.add_argument("-n", "--noise-level", type=float, help="The probability that the opponent's last move will be misrepresented to the agent", required=False, default=0) args=parser.parse_args() results = evolve(args.size, args.num_generations, args.give_up_after, args.num_iterations, args.selection_proportion, args.crossover_rate, args.mutation_rate, args.noise_level) for strategy in strategies: print(str(strategy) + ": " + str(fitness(strategy, args.num_iterations, args.noise_level))) if (args.output_file): with open(args.output_file, "w") as f: for result in results: f.write(result + "\n")