framework

2022-11-19 01:23:49 -08:00 · 2022-11-19 01:23:49 -08:00 · dae6c6f476
parent d361ac0372
commit dae6c6f476
9 changed files with 1657 additions and 0 deletions
--- a/agent.py
+++ b/agent.py
@ -0,0 +1,28 @@
 import random
 class Agent:
    def __init__(self, name, pnr):
        self.name = name
        self.explanation = []
    def get_action(self, nr, hands, knowledge, trash, played, board, valid_actions, hints, hits, cards_left):
        return random.choice(valid_actions)
    def inform(self, action, player):
        pass
    def get_explanation(self):
        return self.explanation
 agent_types = {}
 def register(id, name, agent):
    agent_types[id] = (name,agent)
 register("random", "Random Player", Agent)
 def get(id):
    return agent_types[id]
 def make(id, *args, **kwargs):
    return agent_types[id][1](*args, **kwargs)
 def ids():
    return list(agent_types.keys())
--- a/agents/init.py
+++ b/agents/init.py
--- a/agents/osawa.py
+++ b/agents/osawa.py
@ -0,0 +1,136 @@
 from hanabi import *
 import util
 import agent
 import random
 class InnerStatePlayer(agent.Agent):
    def __init__(self, name, pnr):
        self.name = name
        self.explanation = []
    def get_action(self, nr, hands, knowledge, trash, played, board, valid_actions, hints, hits, cards_left):
        my_knowledge = knowledge[nr]
        potential_discards = []
        for i,k in enumerate(my_knowledge):
            if util.is_playable(k, board):
                return Action(PLAY, card_index=i)
            if util.is_useless(k, board):    
                potential_discards.append(i)
        if potential_discards:
            return Action(DISCARD, card_index=random.choice(potential_discards))
        if hints > 0:
            for player,hand in enumerate(hands):
                if player != nr:
                    for card_index,card in enumerate(hand):
                        if card.is_playable(board):                              
                            if random.random() < 0.5:
                                return Action(HINT_COLOR, player=player, color=card.color)
                            return Action(HINT_RANK, player=player, rank=card.rank)
            hints = util.filter_actions(HINT_COLOR, valid_actions) + util.filter_actions(HINT_RANK, valid_actions)
            return random.choice(hints)
        return random.choice(util.filter_actions(DISCARD, valid_actions))
 def format_hint(h):
    if h == HINT_COLOR:
        return "color"
    return "rank"
 class OuterStatePlayer(agent.Agent):
    def __init__(self, name, pnr):
        self.name = name
        self.hints = {}
        self.pnr = pnr
        self.explanation = []
    def get_action(self, nr, hands, knowledge, trash, played, board, valid_actions, hints, hits, cards_left):
        for player,hand in enumerate(hands):
            for card_index,_ in enumerate(hand):
                if (player,card_index) not in self.hints:
                    self.hints[(player,card_index)] = set()
        known = [""]*5
        for h in self.hints:
            pnr, card_index = h 
            if pnr != nr:
                known[card_index] = str(list(map(format_hint, self.hints[h])))
        self.explanation = [["hints received:"] + known]
        my_knowledge = knowledge[nr]
        potential_discards = []
        for i,k in enumerate(my_knowledge):
            if util.is_playable(k, board):
                return Action(PLAY, card_index=i)
            if util.is_useless(k, board):    
                potential_discards.append(i)
        if potential_discards:
            return Action(DISCARD, card_index=random.choice(potential_discards))
        playables = []        
        for player,hand in enumerate(hands):
            if player != nr:
                for card_index,card in enumerate(hand):
                    if card.is_playable(board):                              
                        playables.append((player,card_index))
        playables.sort(key=lambda which: -hands[which[0]][which[1]].rank)
        while playables and hints > 0:
            player,card_index = playables[0]
            knows_rank = True
            real_color = hands[player][card_index].color
            real_rank = hands[player][card_index].rank
            k = knowledge[player][card_index]
            hinttype = [HINT_COLOR, HINT_RANK]
            for h in self.hints[(player,card_index)]:
                hinttype.remove(h)
            t = None
            if hinttype:
                t = random.choice(hinttype)
            if t == HINT_RANK:
                for i,card in enumerate(hands[player]):
                    if card.rank == hands[player][card_index].rank:
                        self.hints[(player,i)].add(HINT_RANK)
                return Action(HINT_RANK, player=player, rank=hands[player][card_index].rank)
            if t == HINT_COLOR:
                for i,card in enumerate(hands[player]):
                    if card.color == hands[player][card_index].color:
                        self.hints[(player,i)].add(HINT_COLOR)
                return Action(HINT_COLOR, player=player, color=hands[player][card_index].color)
            playables = playables[1:]
        if hints > 0:
            hints = util.filter_actions(HINT_COLOR, valid_actions) + util.filter_actions(HINT_RANK, valid_actions)
            hintgiven = random.choice(hints)
            if hintgiven.type == HINT_COLOR:
                for i,card in enumerate(hands[hintgiven.player]):
                    if card.color == hintgiven.color:
                        self.hints[(hintgiven.player,i)].add(HINT_COLOR)
            else:
                for i,card in enumerate(hands[hintgiven.player]):
                    if card.rank == hintgiven.rank:
                        self.hints[(hintgiven.player,i)].add(HINT_RANK)
            return hintgiven
        return random.choice(util.filter_actions(DISCARD, valid_actions))
    def inform(self, action, player):
        if action.type in [PLAY, DISCARD]:
            if (player,action.card_index) in self.hints:
                self.hints[(player,action.card_index)] = set()
            for i in range(5):
                if (player,action.card_index+i+1) in self.hints:
                    self.hints[(player,action.card_index+i)] = self.hints[(player,action.card_index+i+1)]
                    self.hints[(player,action.card_index+i+1)] = set()
 agent.register("inner", "Inner State Player", InnerStatePlayer)
 agent.register("outer", "Outer State Player", OuterStatePlayer)
--- a/hanabi.py
+++ b/hanabi.py
@ -0,0 +1,293 @@
 import random
 import sys
 import copy
 import time
 GREEN = 0
 YELLOW = 1
 WHITE = 2
 BLUE = 3
 RED = 4
 ALL_COLORS = [GREEN, YELLOW, WHITE, BLUE, RED]
 COLORNAMES = ["green", "yellow", "white", "blue", "red"]
 class Card:
    def __init__(self, color, rank):
        self.color = color 
        self.rank = rank 
    def isColor(self, color):
        return self.color == color 
    def isRank(self, rank):
        return self.rank == rank
    def __eq__(self, other):
        if other is None: return False 
        if type(other) == tuple:
            return (self.color,self.rank) == other
        return (self.color,self.rank) == (other.color,other.rank)
    def __getitem__(self, idx):
        if idx == 0: return self.color 
        return self.rank
    def __str__(self):
        return COLORNAMES[self.color] + " " + str(self.rank)
    def __repr__(self):
        return str((self.color,self.rank))
    def is_useless(self, board):
        return board[self.color].rank + 1 > self.rank
    def is_playable(self, board):
        return board[self.color].rank + 1 == self.rank
    def __iter__(self):
        return iter([self.color, self.rank])
 COUNTS = [3,2,2,2,1]
 # semi-intelligently format cards in any format
 def f(something):
    if type(something) == list:
        return list(map(f, something))
    elif type(something) == dict:
        return {k: something(v) for (k,v) in something.items()}
    elif type(something) == Card:
        return str(something)
    elif type(something) == tuple and len(something) == 2:
        return (COLORNAMES[something[0]],something[1])
    return something
 def make_deck():
    deck = []
    for color in ALL_COLORS:
        for rank, cnt in enumerate(COUNTS):
            for i in range(cnt):
                deck.append(Card(color, rank+1))
    random.shuffle(deck)
    return deck
 def initial_knowledge():
    knowledge = []
    for color in ALL_COLORS:
        knowledge.append(COUNTS[:])
    return knowledge
 def hint_color(knowledge, color, truth):
    result = []
    for col in ALL_COLORS:
        if truth == (col == color):
            result.append(knowledge[col][:])
        else:
            result.append([0 for i in knowledge[col]])
    return result
 def hint_rank(knowledge, rank, truth):
    result = []
    for col in ALL_COLORS:
        colknow = []
        for i,k in enumerate(knowledge[col]):
            if truth == (i + 1 == rank):
                colknow.append(k)
            else:
                colknow.append(0)
        result.append(colknow)
    return result
 HINT_COLOR = 0
 HINT_RANK = 1
 PLAY = 2
 DISCARD = 3
 class Action(object):
    def __init__(self, type, player=None, color=None, rank=None, card_index=None):
        self.type = type
        self.player = player
        self.color = color
        self.rank = rank
        self.card_index = card_index
    def __str__(self):
        if self.type == HINT_COLOR:
            return "hints " + str(self.player) + " about all their " + COLORNAMES[self.color] + " cards"
        if self.type == HINT_RANK:
            return "hints " + str(self.player) + " about all their " + str(self.rank)+"s"
        if self.type == PLAY:
            return "plays card at index " + str(self.card_index)
        if self.type == DISCARD:
            return "discards card at index " + str(self.card_index)
    def __eq__(self, other):
        if other is None: return False
        return (self.type, self.player, self.color, self.rank, self.card_index) == (other.type, other.player, other.color, other.rank, other.card_index)
 def format_card(card):
    return str(card)
 def format_hand(hand):
    return ", ".join(map(format_card, hand))
 class Game(object):
    def __init__(self, players, log=sys.stdout, format=0):
        self.players = players
        self.hits = 3
        self.hints = 8
        self.current_player = 0
        self.board = [Card(c,0) for c in ALL_COLORS]
        self.played = []
        self.deck = make_deck()
        self.extra_turns = 0
        self.hands = []
        self.knowledge = []
        self.make_hands()
        self.trash = []
        self.log = log
        self.turn = 1
        self.format = format
        self.dopostsurvey = False
        self.study = False
        if self.format:
            print(self.deck, file=self.log)
    def make_hands(self):
        handsize = 4
        if len(self.players) < 4:
            handsize = 5
        for i, p in enumerate(self.players):
            self.hands.append([])
            self.knowledge.append([])
            for j in range(handsize):
                self.draw_card(i)
    def draw_card(self, pnr=None):
        if pnr is None:
            pnr = self.current_player
        if not self.deck:
            return
        self.hands[pnr].append(self.deck[0])
        self.knowledge[pnr].append(initial_knowledge())
        del self.deck[0]
    def perform(self, action):
        for p in self.players:
            p.inform(action, self.current_player)
        if format:
            print("MOVE:", self.current_player, action.type, action.card_index, action.player, action.color, action.rank, file=self.log)
        if action.type == HINT_COLOR:
            self.hints -= 1
            print(self.players[self.current_player].name, "hints", self.players[action.player].name, "about all their", COLORNAMES[action.color], "cards", "hints remaining:", self.hints, file=self.log)
            print(self.players[action.player].name, "has", format_hand(self.hands[action.player]), file=self.log)
            for card,knowledge in zip(self.hands[action.player],self.knowledge[action.player]):
                if card.color == action.color:
                    for i, k in enumerate(knowledge):
                        if i != card.color:
                            for i in range(len(k)):
                                k[i] = 0
                else:
                    for i in range(len(knowledge[action.color])):
                        knowledge[action.color][i] = 0
        elif action.type == HINT_RANK:
            self.hints -= 1
            print(self.players[self.current_player].name, "hints", self.players[action.player].name, "about all their", action.rank, "hints remaining:", self.hints, file=self.log)
            print(self.players[action.player].name, "has", format_hand(self.hands[action.player]), file=self.log)
            for card,knowledge in zip(self.hands[action.player],self.knowledge[action.player]):
                if card.rank == action.rank:
                    for k in knowledge:
                        for i in range(len(COUNTS)):
                            if i+1 != card.rank:
                                k[i] = 0
                else:
                    for k in knowledge:
                        k[action.rank-1] = 0
        elif action.type == PLAY:
            card = self.hands[self.current_player][action.card_index]
            print(self.players[self.current_player].name, "plays", format_card(card), end=' ', file=self.log)
            if self.board[card.color][1] == card.rank-1:
                self.board[card.color] = card
                self.played.append(card)
                if card.rank == 5:
                    self.hints += 1
                    self.hints = min(self.hints, 8)
                print("successfully! Board is now", format_hand(self.board), file=self.log)
            else:
                self.trash.append(card)
                self.hits -= 1
                print("and fails. Board was", format_hand(self.board), file=self.log)
            del self.hands[self.current_player][action.card_index]
            del self.knowledge[self.current_player][action.card_index]
            self.draw_card()
            print(self.players[self.current_player].name, "now has", format_hand(self.hands[self.current_player]), file=self.log)
        else:
            self.hints += 1 
            self.hints = min(self.hints, 8)
            self.trash.append(self.hands[self.current_player][action.card_index])
            print(self.players[self.current_player].name, "discards", format_card(self.hands[self.current_player][action.card_index]), file=self.log)
            print("trash is now", format_hand(self.trash), file=self.log)
            del self.hands[self.current_player][action.card_index]
            del self.knowledge[self.current_player][action.card_index]
            self.draw_card()
            print(self.players[self.current_player].name, "now has", format_hand(self.hands[self.current_player]), file=self.log)
    def valid_actions(self):
        valid = []
        for i in range(len(self.hands[self.current_player])):
            valid.append(Action(PLAY, card_index=i))
            valid.append(Action(DISCARD, card_index=i))
        if self.hints > 0:
            for i, p in enumerate(self.players):
                if i != self.current_player:
                    for color in set([card[0] for card in self.hands[i]]):
                        valid.append(Action(HINT_COLOR, player=i, color=color))
                    for rank in set([card[1] for card in self.hands[i]]):
                        valid.append(Action(HINT_RANK, player=i, rank=rank))
        return valid
    def run(self, turns=-1):
        self.turn = 1
        while not self.done() and (turns < 0 or self.turn < turns):
            self.turn += 1
            if not self.deck:
                self.extra_turns += 1
            hands = []
            for i, h in enumerate(self.hands):
                if i == self.current_player:
                    hands.append([])
                else:
                    hands.append(h)
            valid = self.valid_actions()
            action = None
            while action not in valid:
                action = self.players[self.current_player].get_action(self.current_player, hands, copy.deepcopy(self.knowledge), self.trash[:], self.played[:], self.board[:], valid, self.hints, self.hits, len(self.deck))
                if action not in valid:
                    print("Tried to perform illegal action, retrying")
            self.perform(action)
            self.current_player += 1
            self.current_player %= len(self.players)
        print("Game done, hits left:", self.hits, file=self.log)
        points = self.score()
        print("Points:", points, file=self.log)
        return points
    def score(self):
        return sum([card.rank for card in self.board])
    def single_turn(self):
        if not self.done():
            if not self.deck:
                self.extra_turns += 1
            hands = []
            for i, h in enumerate(self.hands):
                if i == self.current_player:
                    hands.append([])
                else:
                    hands.append(h)
            action = self.players[self.current_player].get_action(self.current_player, hands, self.knowledge, self.trash, self.played, self.board, self.valid_actions(), self.hints, self.hits, len(self.deck))
            self.perform(action)
            self.current_player += 1
            self.current_player %= len(self.players)
    def external_turn(self, action): 
        if not self.done():
            if not self.deck:
                self.extra_turns += 1
            self.perform(action)
            self.current_player += 1
            self.current_player %= len(self.players)
    def done(self):
        if self.extra_turns == len(self.players) or self.hits == 0:
            return True
        for card in self.board:
            if card.rank != 5:
                return False
        return True
    def finish(self):
        if self.format:
            print("Score", self.score(), file=self.log)
            self.log.close()
--- a/httpui.py
+++ b/httpui.py
--- a/main.py
+++ b/main.py
@ -0,0 +1,85 @@
 from hanabi import Game
 import agent
 import random
 import os 
 import importlib
 import sys
 import math
 import argparse
 for f in os.listdir("agents"):
    if f.endswith(".py") and f != "__init__.py":
        importlib.import_module("agents."+f[:-3])
 class NullStream(object):
    def write(self, *args):
        pass
 names = ["Shangdi", "Nu Wa", "Yu Di", "Tian", "Pangu"]
 def main(n=100, seed=0, agents=[]):
    random.shuffle(names)
    if not agents:
        agents = []
    while len(agents) < 2:
        agents.append("random")
    out = NullStream()
    if n < 6:
        out = sys.stdout
    pts = []
    for i in range(n):
        if (i+1)%100 == 0:
            print("Starting game", i+1)
        if seed is not None:
            random.seed(seed+i+1)
        players = []
        for i,a in enumerate(agents):
            players.append(agent.get(a)[1](names[i], i))
        g = Game(players, out)
        try:
            pts.append(g.run())
            if (i+1)%100 == 0:
                print("score", pts[-1])
        except Exception:
            import traceback
            traceback.print_exc()
    if n < 10:
        print("Scores:", pts)
    if n > 1:
        mean = sum(pts)*1.0/len(pts)
        print("mean: %.2f"%(mean))
        ssqs = [(p-mean)**2 for p in pts]
        print("stddev: %.2f"%(math.sqrt(sum(ssqs)/(len(pts)-1))))
        print("range", min(pts), max(pts))
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Simulate several games of Hanabi.')
    parser.add_argument('agents', metavar='A', nargs='*',
                        help='the agent types that should play (minimum 2)')
    parser.add_argument('--list', dest='list', action='store_true',
                        default=False, help='Show available agent types and quit')
    parser.add_argument('-n', '--count', '--games', dest='n', action='store',
                        type=int, default=100, help='How many games should the agents play?')
    parser.add_argument('-s', '--seed', dest='seed', action='store',
                        type=int, default=0, help='The random seed to be used')
    parser.add_argument('-r', '--random', dest='rand', action='store_true',
                        default=False, help='Do not use random seed; make games truly random.')
    args = parser.parse_args()
    if args.list:
        print("Available agents:")
        for id in agent.ids():
            print("  %s: %s"%(id, agent.get(id)[0]))
    else:
        main(args.n, (None if args.rand else args.seed), args.agents)
--- a/serverconf.py
+++ b/serverconf.py
@ -0,0 +1,3 @@
 HOST_NAME = "127.0.0.1"
 PORT_NUMBER = 31337
--- a/tutorial.py
+++ b/tutorial.py
@ -0,0 +1,38 @@
 intro = """
 <h1>Short introduction to the Hanabi User Interface</h1>
 <table width="800px"><tr><td>
 <p>You will play a game of the card game Hanabi in a browser-based implementation. This tutorial will describe how to use the user interface, so please read it carefully.</p>
 <p>If you are not familiar with the card game Hanabi or need a refresher on the rules, you can read a short summary at the end of this page or refer to the official rules <a href="http://www.regatuljocurilor.ro/product_extra_files/HanabiRules-EnglishTransofFrench-1page.pdf" target="_blank">here</a>
 <p>The user interface you will be playing in looks like this:
 <p><img src="/hanabiui.png"/></p>
 <p>On the left you can see how many hint tokens are currently available, how many mistakes have been made so far and how many cards are left in the deck. If you reach 2 mistakes, as in the picture above, the number will turn red and be shown in bold to draw your attention to that fact. 
 </p>
 <p>In the center you see the AI player's hand on top, the board in the center and a representation of your hand (which you can't see) on the bottom. To play or discard one of your own cards, click the link
 on that card to do so. To hint the AI about all their cards of a particular color or rank, click the link on any card that matches that color or rank. For example, if you click the "hint color" link on the yellow 4, they
 will be hinted about all their yellow cards. <b>Note that the "Hint Color" and "Hint Rank" links will not be shown when no hint tokens are available.</b> Underneath each card in your hand you can see what you have been told about that card in the past. The same goes for the cards in the AI's hand. Note that you will not be reminded
 of information that you can infer from a hint. In particular, if you are told that some of your cards are 1s, they will be marked as such, but the other cards will <b>not</b> be marked as "not 1s". </p>
 <p>On the right side of the screen, finally, you will see the last actions that happened, with the newest action on top of the list. The cards that were affected by the last two actions (your last action and the last action the AI performed) will also be highlighted in red. For hints that were given this will appear as a red frame around the card or cards in a player's hand. For a card that was successfully played a red frame will be drawn around the stack on the board on which the card was played. Otherwise, if a card is unsuccessfully played or discarded, that card will be highlighted in red in the list of cards in the trash.</p>
 <p>When you click "Continue" you and the AI will immediately be dealt cards. When you click "Start Game" the AI will take the first turn, and then it is your turn.
 </td></tr></table>
 """
 summary = """
 <h2>Hanabi rules summary</h2>
 <table width="800px"><tr><td>
 <p>Hanabi is a cooperative card game in which you don't see your own cards, but you see the cards the other player has in their hand. There are 5 colors in the game: yellow, red, blue, green and white, and each color has cards in the ranks 1 to 5. Each color has three copies of the 1s, two copies of the 2s, 3s and 4s and only a single copy of the 5s.</p>
 <p>The goal of the game is to play the cards on five stacks, one for each color, in ascending order, starting with the 1s. At the end of the game you will receive one point for each card that was successfully played.</p>
 <p>On your turn you have the choice between one of three actions:
 <ul>
 <li> <b>Play a card:</b> Choose a card from your hand and play it. If it is the next card in ascending order for any stack on the board, it will be placed there, otherwise it will be counted as a mistake and the car will be put in the trash.
 <li> <b>Give a hint:</b> Tell the other player about <b>all</b> cards in their hand that have a particular color or a particular rank. For example, you can tell the other player which of their cards are yellow, but you have to tell them all their yellow cards. Likewise, if you want to tell the other player which of their cards are 3s, you have to tell them all their 3s. You can also not tell them that they have zero of a particular color or rank. Giving a hint consumes one hint token, of which there are initially 8.
 <li> <b>Discard a card:</b> Choose a card from your hand and put it in the trash pile. This will regenerate one hint token, but you can never have more than the initial 8.
 </ul>
 The game lasts until either the last card is drawn, plus one additional turn for each player, or until 3 mistakes have been made.
 </td></tr></table>
 """
--- a/util.py
+++ b/util.py
@ -0,0 +1,66 @@
 from hanabi import *
 def is_playable(knowledge, board):
    possible = get_possible(knowledge)
    return all(map(playable(board), possible))
 def maybe_playable(knowledge, board):
    possible = get_possible(knowledge)
    return any(map(playable(board), possible))
 def is_useless(knowledge, board):
    possible = get_possible(knowledge)
    return all(map(useless(board), possible))
 def maybe_useless(knowledge, board):
    possible = get_possible(knowledge)
    return any(map(useless(board), possible))
 def has_property(predicate, knowledge):
    possible = get_possible(knowledge)
    return all(map(predicate, possible))
 def may_have_property(predicate, knowledge):
    possible = get_possible(knowledge)
    return any(map(predicate, possible))
 def probability(predicate, knowledge):
    num = 0.0
    denom = 0.0
    for col in ALL_COLORS:
        for i,cnt in enumerate(knowledge[col]):
            if predicate(Card(col,i+1)):
                num += cnt 
            denom += cnt
    return num/denom
 def playable(board):
    def playable_inner(card):
        return card.is_playable(board)
    return playable_inner
 def useless(board):
    def useless_inner(card):
        return card.is_useless(board)
    return useless_inner
 def has_rank(rank):
    def has_rank_inner(card):
        return card.rank == rank
    return has_rank_inner
 def has_color(color):
    def has_color_inner(card):
        return card.color == color
    return has_color_inner
 def get_possible(knowledge):
    result = []
    for col in ALL_COLORS:
        for i,cnt in enumerate(knowledge[col]):
            if cnt > 0:
                result.append(Card(col,i+1))
    return result
 def filter_actions(type, actions):
    return [act for act in actions if act.type == type]
		`@ -0,0 +1,3 @@`

							`HOST_NAME = "127.0.0.1"`
							`PORT_NUMBER = 31337`