Snippet: game_solver.py

Implementación de Teoría de juegos que estoy preparando para un paper

#!/usr/bin/env python
#-*-coding:utf-8-*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.

#===============================================================================
# DOC
#===============================================================================

"""This code is a simple implementation of an algoritm published in the paper

    "Aplicación de Teoría de Juegos a una Inteligencia Artificial de un Browser
     Game Persistente de Estrategia" (JAIIO 40, 2011, http://www.40jaiio.org.ar)

    (EN: Applying Game Theory to an Artificial Intelligence Persistent Browser
     Strategy Game)

"""

#===============================================================================
# META
#===============================================================================

__version__ = "0.1"
__license__ = "GPL3"
__author__ = "JBC"
__mail__ = "jbc dot develop at gmail dot com"

#===============================================================================
# IMPORTS
#===============================================================================

import operator
import random

#===============================================================================
# FUNCTIONS
#===============================================================================

def liar_solve(mtx):
    """Returns the probability of victory of all strategies with respect to the
    strategies that were not used.

    @param mtx: a matrix where each row represents the strategies of player 1,
    each column the strategies of player 2, and each cell has the advantage that
    the strategies of the row over the column (negative values ​​imply advantages
    in the column on the rows and ​​None values siginifca that these strategies
    never encountered)

    @return A tuple with 2 list, the first one with the probabilities of the
    row strategies, and te second one with the probablities of the colum

    """
    to_percent = lambda values: [(v * 100.0) / sum(values) for v in values]
    row_nones = [l.count(None) for l in mtx]
    col_nones = 1
    return to_percent(row_nones), to_percent(col_nones)

def game_theory_solve(mtx):
    """ Approximate the strategy oddments for 2
    person zero-sum games of perfect information.

    Applies the iterative solution method described
    by J.D. Williams in his classic
    book, The Compleat Strategyst, ISBN 0-486-25101-2.
    See chapter 5, page 180 for details.

    @param mtx: a matrix where each row represents the strategies of player 1,
    each column the strategies of player 2, and each cell has the advantage that
    the strategies of the row over the column (negative values ​​imply advantages
    in the column on the rows)

    @return A tuple with 2 list, the first one with the probabilities of the
    row strategies, and te second one with the probablities of the colum

    Based on the work of:
        @author: Raymond Hettinger
        @contact: http://code.activestate.com/recipes/496825/

    """
    iterations = 100
    transpose = zip(*mtx)
    numrows = len(mtx)
    numcols = len(transpose)
    row_cum_payoff = [0] * numrows
    col_cum_payoff = [0] * numcols
    colpos = range(numcols)
    rowpos = map(operator.neg, xrange(numrows))
    colcnt = [0] * numcols
    rowcnt = [0] * numrows
    active = 0
    for i in xrange(iterations):
        rowcnt[active] += 1
        col_cum_payoff = map(operator.add, mtx[active], col_cum_payoff)
        active = min(zip(col_cum_payoff, colpos))[1]
        colcnt[active] += 1
        row_cum_payoff = map(operator.add, transpose[active], row_cum_payoff)
        active = -max(zip(row_cum_payoff, rowpos))[1]
    return rowcnt, colcnt

def best_strategy(mtx, plyr):
    """Returns the suggested strategy for a given player. If all of the values
    ​​of all non-null lamatris used game theory is used otherwise a selection to
    try to encourage the elimination of null values​​.

    @param mtx: a matrix where each row represents the strategies of player 1,
    each column the strategies of player 2, and each cell has the advantage that
    the strategies of the row over the column (negative values ​​imply advantages
    in the column on the rows and ​​None values siginifca that these strategies
    never encountered)

    @param plyr: Two values are accepted. "0" for the "row" player and "1" for the
    column player.

    @return An integer that represents the strategy n-th of row or column

    """
    probs = None
    solver = None

    # Verify if matrix are empty
    if not any([l.count(None) for l in mtx]):
        probs = game_theory_solve(mtx)[plyr]
        solver = "game_theory_solver"
    else:
        probs = liar_solve(mtx)[plyr]
        solver = "liar_solver"

    # Calculate the cumulative frequency
    for idx, prob in enumerate(probs):
        cum = probs[idx-1][1] if idx != 0 else 0
        probs[idx] = (idx, prob + cum)

    # Find E*
    while True:
        roll = random.randint(1, 100)
        for stg, prob in probs:
            if roll < prob:
                return stg, solver

def update_mtx(mtx, row, column, plyr):
    """Update the matrix to improve decisions.

    @param mtx: The matrif to update.

    @param row: The index of the row strategy.

    @param column: The index of the column strategy.

    @patam player: Two values are accepted.
        0: for the "row" player the mtx[row][column] must be incremented in 1
        1: for the "column" player the mtx[row][column] must be decremented in 1

    """
    value = mtx[row][column] or 0
    if plyr == 0:
        value += 1
    elif plyr == 1:
        value -= 1
    else:
        raise ValueError("'plyr' must be '0' or '1', found: %s" % str(plyr))
    mtx[row][column] = value

#===============================================================================
# TEST
#===============================================================================

FULL_MTX = ([4,    0,     1],
            [100,  23,    2],
            [5,    23,    1],
            [1,    1,     0])

INC_MTX = ([4,      0,    1],
            [100,   None, 2],
            [5,     23,    None],
            [None, 1,     None])

print best_strategy(FULL_MTX, 0)
print best_strategy(INC_MTX, 1)

Aplicación de Teoría de Juegos a una Inteligencia Artificial de un Browser Game Persistente de Estrategia

Deja un comentario

Tu dirección de correo electrónico no será publicada. Los campos necesarios están marcados *

Puedes usar las siguientes etiquetas y atributos HTML: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>