framework

This commit is contained in:
JH159753 2022-11-19 01:23:49 -08:00
parent d361ac0372
commit dae6c6f476
9 changed files with 1657 additions and 0 deletions

28
agent.py Normal file
View File

@ -0,0 +1,28 @@
import random
class Agent:
def __init__(self, name, pnr):
self.name = name
self.explanation = []
def get_action(self, nr, hands, knowledge, trash, played, board, valid_actions, hints, hits, cards_left):
return random.choice(valid_actions)
def inform(self, action, player):
pass
def get_explanation(self):
return self.explanation
agent_types = {}
def register(id, name, agent):
agent_types[id] = (name,agent)
register("random", "Random Player", Agent)
def get(id):
return agent_types[id]
def make(id, *args, **kwargs):
return agent_types[id][1](*args, **kwargs)
def ids():
return list(agent_types.keys())

0
agents/__init__.py Normal file
View File

136
agents/osawa.py Normal file
View File

@ -0,0 +1,136 @@
from hanabi import *
import util
import agent
import random
class InnerStatePlayer(agent.Agent):
def __init__(self, name, pnr):
self.name = name
self.explanation = []
def get_action(self, nr, hands, knowledge, trash, played, board, valid_actions, hints, hits, cards_left):
my_knowledge = knowledge[nr]
potential_discards = []
for i,k in enumerate(my_knowledge):
if util.is_playable(k, board):
return Action(PLAY, card_index=i)
if util.is_useless(k, board):
potential_discards.append(i)
if potential_discards:
return Action(DISCARD, card_index=random.choice(potential_discards))
if hints > 0:
for player,hand in enumerate(hands):
if player != nr:
for card_index,card in enumerate(hand):
if card.is_playable(board):
if random.random() < 0.5:
return Action(HINT_COLOR, player=player, color=card.color)
return Action(HINT_RANK, player=player, rank=card.rank)
hints = util.filter_actions(HINT_COLOR, valid_actions) + util.filter_actions(HINT_RANK, valid_actions)
return random.choice(hints)
return random.choice(util.filter_actions(DISCARD, valid_actions))
def format_hint(h):
if h == HINT_COLOR:
return "color"
return "rank"
class OuterStatePlayer(agent.Agent):
def __init__(self, name, pnr):
self.name = name
self.hints = {}
self.pnr = pnr
self.explanation = []
def get_action(self, nr, hands, knowledge, trash, played, board, valid_actions, hints, hits, cards_left):
for player,hand in enumerate(hands):
for card_index,_ in enumerate(hand):
if (player,card_index) not in self.hints:
self.hints[(player,card_index)] = set()
known = [""]*5
for h in self.hints:
pnr, card_index = h
if pnr != nr:
known[card_index] = str(list(map(format_hint, self.hints[h])))
self.explanation = [["hints received:"] + known]
my_knowledge = knowledge[nr]
potential_discards = []
for i,k in enumerate(my_knowledge):
if util.is_playable(k, board):
return Action(PLAY, card_index=i)
if util.is_useless(k, board):
potential_discards.append(i)
if potential_discards:
return Action(DISCARD, card_index=random.choice(potential_discards))
playables = []
for player,hand in enumerate(hands):
if player != nr:
for card_index,card in enumerate(hand):
if card.is_playable(board):
playables.append((player,card_index))
playables.sort(key=lambda which: -hands[which[0]][which[1]].rank)
while playables and hints > 0:
player,card_index = playables[0]
knows_rank = True
real_color = hands[player][card_index].color
real_rank = hands[player][card_index].rank
k = knowledge[player][card_index]
hinttype = [HINT_COLOR, HINT_RANK]
for h in self.hints[(player,card_index)]:
hinttype.remove(h)
t = None
if hinttype:
t = random.choice(hinttype)
if t == HINT_RANK:
for i,card in enumerate(hands[player]):
if card.rank == hands[player][card_index].rank:
self.hints[(player,i)].add(HINT_RANK)
return Action(HINT_RANK, player=player, rank=hands[player][card_index].rank)
if t == HINT_COLOR:
for i,card in enumerate(hands[player]):
if card.color == hands[player][card_index].color:
self.hints[(player,i)].add(HINT_COLOR)
return Action(HINT_COLOR, player=player, color=hands[player][card_index].color)
playables = playables[1:]
if hints > 0:
hints = util.filter_actions(HINT_COLOR, valid_actions) + util.filter_actions(HINT_RANK, valid_actions)
hintgiven = random.choice(hints)
if hintgiven.type == HINT_COLOR:
for i,card in enumerate(hands[hintgiven.player]):
if card.color == hintgiven.color:
self.hints[(hintgiven.player,i)].add(HINT_COLOR)
else:
for i,card in enumerate(hands[hintgiven.player]):
if card.rank == hintgiven.rank:
self.hints[(hintgiven.player,i)].add(HINT_RANK)
return hintgiven
return random.choice(util.filter_actions(DISCARD, valid_actions))
def inform(self, action, player):
if action.type in [PLAY, DISCARD]:
if (player,action.card_index) in self.hints:
self.hints[(player,action.card_index)] = set()
for i in range(5):
if (player,action.card_index+i+1) in self.hints:
self.hints[(player,action.card_index+i)] = self.hints[(player,action.card_index+i+1)]
self.hints[(player,action.card_index+i+1)] = set()
agent.register("inner", "Inner State Player", InnerStatePlayer)
agent.register("outer", "Outer State Player", OuterStatePlayer)

293
hanabi.py Normal file
View File

@ -0,0 +1,293 @@
import random
import sys
import copy
import time
GREEN = 0
YELLOW = 1
WHITE = 2
BLUE = 3
RED = 4
ALL_COLORS = [GREEN, YELLOW, WHITE, BLUE, RED]
COLORNAMES = ["green", "yellow", "white", "blue", "red"]
class Card:
def __init__(self, color, rank):
self.color = color
self.rank = rank
def isColor(self, color):
return self.color == color
def isRank(self, rank):
return self.rank == rank
def __eq__(self, other):
if other is None: return False
if type(other) == tuple:
return (self.color,self.rank) == other
return (self.color,self.rank) == (other.color,other.rank)
def __getitem__(self, idx):
if idx == 0: return self.color
return self.rank
def __str__(self):
return COLORNAMES[self.color] + " " + str(self.rank)
def __repr__(self):
return str((self.color,self.rank))
def is_useless(self, board):
return board[self.color].rank + 1 > self.rank
def is_playable(self, board):
return board[self.color].rank + 1 == self.rank
def __iter__(self):
return iter([self.color, self.rank])
COUNTS = [3,2,2,2,1]
# semi-intelligently format cards in any format
def f(something):
if type(something) == list:
return list(map(f, something))
elif type(something) == dict:
return {k: something(v) for (k,v) in something.items()}
elif type(something) == Card:
return str(something)
elif type(something) == tuple and len(something) == 2:
return (COLORNAMES[something[0]],something[1])
return something
def make_deck():
deck = []
for color in ALL_COLORS:
for rank, cnt in enumerate(COUNTS):
for i in range(cnt):
deck.append(Card(color, rank+1))
random.shuffle(deck)
return deck
def initial_knowledge():
knowledge = []
for color in ALL_COLORS:
knowledge.append(COUNTS[:])
return knowledge
def hint_color(knowledge, color, truth):
result = []
for col in ALL_COLORS:
if truth == (col == color):
result.append(knowledge[col][:])
else:
result.append([0 for i in knowledge[col]])
return result
def hint_rank(knowledge, rank, truth):
result = []
for col in ALL_COLORS:
colknow = []
for i,k in enumerate(knowledge[col]):
if truth == (i + 1 == rank):
colknow.append(k)
else:
colknow.append(0)
result.append(colknow)
return result
HINT_COLOR = 0
HINT_RANK = 1
PLAY = 2
DISCARD = 3
class Action(object):
def __init__(self, type, player=None, color=None, rank=None, card_index=None):
self.type = type
self.player = player
self.color = color
self.rank = rank
self.card_index = card_index
def __str__(self):
if self.type == HINT_COLOR:
return "hints " + str(self.player) + " about all their " + COLORNAMES[self.color] + " cards"
if self.type == HINT_RANK:
return "hints " + str(self.player) + " about all their " + str(self.rank)+"s"
if self.type == PLAY:
return "plays card at index " + str(self.card_index)
if self.type == DISCARD:
return "discards card at index " + str(self.card_index)
def __eq__(self, other):
if other is None: return False
return (self.type, self.player, self.color, self.rank, self.card_index) == (other.type, other.player, other.color, other.rank, other.card_index)
def format_card(card):
return str(card)
def format_hand(hand):
return ", ".join(map(format_card, hand))
class Game(object):
def __init__(self, players, log=sys.stdout, format=0):
self.players = players
self.hits = 3
self.hints = 8
self.current_player = 0
self.board = [Card(c,0) for c in ALL_COLORS]
self.played = []
self.deck = make_deck()
self.extra_turns = 0
self.hands = []
self.knowledge = []
self.make_hands()
self.trash = []
self.log = log
self.turn = 1
self.format = format
self.dopostsurvey = False
self.study = False
if self.format:
print(self.deck, file=self.log)
def make_hands(self):
handsize = 4
if len(self.players) < 4:
handsize = 5
for i, p in enumerate(self.players):
self.hands.append([])
self.knowledge.append([])
for j in range(handsize):
self.draw_card(i)
def draw_card(self, pnr=None):
if pnr is None:
pnr = self.current_player
if not self.deck:
return
self.hands[pnr].append(self.deck[0])
self.knowledge[pnr].append(initial_knowledge())
del self.deck[0]
def perform(self, action):
for p in self.players:
p.inform(action, self.current_player)
if format:
print("MOVE:", self.current_player, action.type, action.card_index, action.player, action.color, action.rank, file=self.log)
if action.type == HINT_COLOR:
self.hints -= 1
print(self.players[self.current_player].name, "hints", self.players[action.player].name, "about all their", COLORNAMES[action.color], "cards", "hints remaining:", self.hints, file=self.log)
print(self.players[action.player].name, "has", format_hand(self.hands[action.player]), file=self.log)
for card,knowledge in zip(self.hands[action.player],self.knowledge[action.player]):
if card.color == action.color:
for i, k in enumerate(knowledge):
if i != card.color:
for i in range(len(k)):
k[i] = 0
else:
for i in range(len(knowledge[action.color])):
knowledge[action.color][i] = 0
elif action.type == HINT_RANK:
self.hints -= 1
print(self.players[self.current_player].name, "hints", self.players[action.player].name, "about all their", action.rank, "hints remaining:", self.hints, file=self.log)
print(self.players[action.player].name, "has", format_hand(self.hands[action.player]), file=self.log)
for card,knowledge in zip(self.hands[action.player],self.knowledge[action.player]):
if card.rank == action.rank:
for k in knowledge:
for i in range(len(COUNTS)):
if i+1 != card.rank:
k[i] = 0
else:
for k in knowledge:
k[action.rank-1] = 0
elif action.type == PLAY:
card = self.hands[self.current_player][action.card_index]
print(self.players[self.current_player].name, "plays", format_card(card), end=' ', file=self.log)
if self.board[card.color][1] == card.rank-1:
self.board[card.color] = card
self.played.append(card)
if card.rank == 5:
self.hints += 1
self.hints = min(self.hints, 8)
print("successfully! Board is now", format_hand(self.board), file=self.log)
else:
self.trash.append(card)
self.hits -= 1
print("and fails. Board was", format_hand(self.board), file=self.log)
del self.hands[self.current_player][action.card_index]
del self.knowledge[self.current_player][action.card_index]
self.draw_card()
print(self.players[self.current_player].name, "now has", format_hand(self.hands[self.current_player]), file=self.log)
else:
self.hints += 1
self.hints = min(self.hints, 8)
self.trash.append(self.hands[self.current_player][action.card_index])
print(self.players[self.current_player].name, "discards", format_card(self.hands[self.current_player][action.card_index]), file=self.log)
print("trash is now", format_hand(self.trash), file=self.log)
del self.hands[self.current_player][action.card_index]
del self.knowledge[self.current_player][action.card_index]
self.draw_card()
print(self.players[self.current_player].name, "now has", format_hand(self.hands[self.current_player]), file=self.log)
def valid_actions(self):
valid = []
for i in range(len(self.hands[self.current_player])):
valid.append(Action(PLAY, card_index=i))
valid.append(Action(DISCARD, card_index=i))
if self.hints > 0:
for i, p in enumerate(self.players):
if i != self.current_player:
for color in set([card[0] for card in self.hands[i]]):
valid.append(Action(HINT_COLOR, player=i, color=color))
for rank in set([card[1] for card in self.hands[i]]):
valid.append(Action(HINT_RANK, player=i, rank=rank))
return valid
def run(self, turns=-1):
self.turn = 1
while not self.done() and (turns < 0 or self.turn < turns):
self.turn += 1
if not self.deck:
self.extra_turns += 1
hands = []
for i, h in enumerate(self.hands):
if i == self.current_player:
hands.append([])
else:
hands.append(h)
valid = self.valid_actions()
action = None
while action not in valid:
action = self.players[self.current_player].get_action(self.current_player, hands, copy.deepcopy(self.knowledge), self.trash[:], self.played[:], self.board[:], valid, self.hints, self.hits, len(self.deck))
if action not in valid:
print("Tried to perform illegal action, retrying")
self.perform(action)
self.current_player += 1
self.current_player %= len(self.players)
print("Game done, hits left:", self.hits, file=self.log)
points = self.score()
print("Points:", points, file=self.log)
return points
def score(self):
return sum([card.rank for card in self.board])
def single_turn(self):
if not self.done():
if not self.deck:
self.extra_turns += 1
hands = []
for i, h in enumerate(self.hands):
if i == self.current_player:
hands.append([])
else:
hands.append(h)
action = self.players[self.current_player].get_action(self.current_player, hands, self.knowledge, self.trash, self.played, self.board, self.valid_actions(), self.hints, self.hits, len(self.deck))
self.perform(action)
self.current_player += 1
self.current_player %= len(self.players)
def external_turn(self, action):
if not self.done():
if not self.deck:
self.extra_turns += 1
self.perform(action)
self.current_player += 1
self.current_player %= len(self.players)
def done(self):
if self.extra_turns == len(self.players) or self.hits == 0:
return True
for card in self.board:
if card.rank != 5:
return False
return True
def finish(self):
if self.format:
print("Score", self.score(), file=self.log)
self.log.close()

1008
httpui.py Normal file

File diff suppressed because it is too large Load Diff

85
main.py Normal file
View File

@ -0,0 +1,85 @@
from hanabi import Game
import agent
import random
import os
import importlib
import sys
import math
import argparse
for f in os.listdir("agents"):
if f.endswith(".py") and f != "__init__.py":
importlib.import_module("agents."+f[:-3])
class NullStream(object):
def write(self, *args):
pass
names = ["Shangdi", "Nu Wa", "Yu Di", "Tian", "Pangu"]
def main(n=100, seed=0, agents=[]):
random.shuffle(names)
if not agents:
agents = []
while len(agents) < 2:
agents.append("random")
out = NullStream()
if n < 6:
out = sys.stdout
pts = []
for i in range(n):
if (i+1)%100 == 0:
print("Starting game", i+1)
if seed is not None:
random.seed(seed+i+1)
players = []
for i,a in enumerate(agents):
players.append(agent.get(a)[1](names[i], i))
g = Game(players, out)
try:
pts.append(g.run())
if (i+1)%100 == 0:
print("score", pts[-1])
except Exception:
import traceback
traceback.print_exc()
if n < 10:
print("Scores:", pts)
if n > 1:
mean = sum(pts)*1.0/len(pts)
print("mean: %.2f"%(mean))
ssqs = [(p-mean)**2 for p in pts]
print("stddev: %.2f"%(math.sqrt(sum(ssqs)/(len(pts)-1))))
print("range", min(pts), max(pts))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Simulate several games of Hanabi.')
parser.add_argument('agents', metavar='A', nargs='*',
help='the agent types that should play (minimum 2)')
parser.add_argument('--list', dest='list', action='store_true',
default=False, help='Show available agent types and quit')
parser.add_argument('-n', '--count', '--games', dest='n', action='store',
type=int, default=100, help='How many games should the agents play?')
parser.add_argument('-s', '--seed', dest='seed', action='store',
type=int, default=0, help='The random seed to be used')
parser.add_argument('-r', '--random', dest='rand', action='store_true',
default=False, help='Do not use random seed; make games truly random.')
args = parser.parse_args()
if args.list:
print("Available agents:")
for id in agent.ids():
print(" %s: %s"%(id, agent.get(id)[0]))
else:
main(args.n, (None if args.rand else args.seed), args.agents)

3
serverconf.py Normal file
View File

@ -0,0 +1,3 @@
HOST_NAME = "127.0.0.1"
PORT_NUMBER = 31337

38
tutorial.py Normal file
View File

@ -0,0 +1,38 @@
intro = """
<h1>Short introduction to the Hanabi User Interface</h1>
<table width="800px"><tr><td>
<p>You will play a game of the card game Hanabi in a browser-based implementation. This tutorial will describe how to use the user interface, so please read it carefully.</p>
<p>If you are not familiar with the card game Hanabi or need a refresher on the rules, you can read a short summary at the end of this page or refer to the official rules <a href="http://www.regatuljocurilor.ro/product_extra_files/HanabiRules-EnglishTransofFrench-1page.pdf" target="_blank">here</a>
<p>The user interface you will be playing in looks like this:
<p><img src="/hanabiui.png"/></p>
<p>On the left you can see how many hint tokens are currently available, how many mistakes have been made so far and how many cards are left in the deck. If you reach 2 mistakes, as in the picture above, the number will turn red and be shown in bold to draw your attention to that fact.
</p>
<p>In the center you see the AI player's hand on top, the board in the center and a representation of your hand (which you can't see) on the bottom. To play or discard one of your own cards, click the link
on that card to do so. To hint the AI about all their cards of a particular color or rank, click the link on any card that matches that color or rank. For example, if you click the "hint color" link on the yellow 4, they
will be hinted about all their yellow cards. <b>Note that the "Hint Color" and "Hint Rank" links will not be shown when no hint tokens are available.</b> Underneath each card in your hand you can see what you have been told about that card in the past. The same goes for the cards in the AI's hand. Note that you will not be reminded
of information that you can infer from a hint. In particular, if you are told that some of your cards are 1s, they will be marked as such, but the other cards will <b>not</b> be marked as "not 1s". </p>
<p>On the right side of the screen, finally, you will see the last actions that happened, with the newest action on top of the list. The cards that were affected by the last two actions (your last action and the last action the AI performed) will also be highlighted in red. For hints that were given this will appear as a red frame around the card or cards in a player's hand. For a card that was successfully played a red frame will be drawn around the stack on the board on which the card was played. Otherwise, if a card is unsuccessfully played or discarded, that card will be highlighted in red in the list of cards in the trash.</p>
<p>When you click "Continue" you and the AI will immediately be dealt cards. When you click "Start Game" the AI will take the first turn, and then it is your turn.
</td></tr></table>
"""
summary = """
<h2>Hanabi rules summary</h2>
<table width="800px"><tr><td>
<p>Hanabi is a cooperative card game in which you don't see your own cards, but you see the cards the other player has in their hand. There are 5 colors in the game: yellow, red, blue, green and white, and each color has cards in the ranks 1 to 5. Each color has three copies of the 1s, two copies of the 2s, 3s and 4s and only a single copy of the 5s.</p>
<p>The goal of the game is to play the cards on five stacks, one for each color, in ascending order, starting with the 1s. At the end of the game you will receive one point for each card that was successfully played.</p>
<p>On your turn you have the choice between one of three actions:
<ul>
<li> <b>Play a card:</b> Choose a card from your hand and play it. If it is the next card in ascending order for any stack on the board, it will be placed there, otherwise it will be counted as a mistake and the car will be put in the trash.
<li> <b>Give a hint:</b> Tell the other player about <b>all</b> cards in their hand that have a particular color or a particular rank. For example, you can tell the other player which of their cards are yellow, but you have to tell them all their yellow cards. Likewise, if you want to tell the other player which of their cards are 3s, you have to tell them all their 3s. You can also not tell them that they have zero of a particular color or rank. Giving a hint consumes one hint token, of which there are initially 8.
<li> <b>Discard a card:</b> Choose a card from your hand and put it in the trash pile. This will regenerate one hint token, but you can never have more than the initial 8.
</ul>
The game lasts until either the last card is drawn, plus one additional turn for each player, or until 3 mistakes have been made.
</td></tr></table>
"""

66
util.py Normal file
View File

@ -0,0 +1,66 @@
from hanabi import *
def is_playable(knowledge, board):
possible = get_possible(knowledge)
return all(map(playable(board), possible))
def maybe_playable(knowledge, board):
possible = get_possible(knowledge)
return any(map(playable(board), possible))
def is_useless(knowledge, board):
possible = get_possible(knowledge)
return all(map(useless(board), possible))
def maybe_useless(knowledge, board):
possible = get_possible(knowledge)
return any(map(useless(board), possible))
def has_property(predicate, knowledge):
possible = get_possible(knowledge)
return all(map(predicate, possible))
def may_have_property(predicate, knowledge):
possible = get_possible(knowledge)
return any(map(predicate, possible))
def probability(predicate, knowledge):
num = 0.0
denom = 0.0
for col in ALL_COLORS:
for i,cnt in enumerate(knowledge[col]):
if predicate(Card(col,i+1)):
num += cnt
denom += cnt
return num/denom
def playable(board):
def playable_inner(card):
return card.is_playable(board)
return playable_inner
def useless(board):
def useless_inner(card):
return card.is_useless(board)
return useless_inner
def has_rank(rank):
def has_rank_inner(card):
return card.rank == rank
return has_rank_inner
def has_color(color):
def has_color_inner(card):
return card.color == color
return has_color_inner
def get_possible(knowledge):
result = []
for col in ALL_COLORS:
for i,cnt in enumerate(knowledge[col]):
if cnt > 0:
result.append(Card(col,i+1))
return result
def filter_actions(type, actions):
return [act for act in actions if act.type == type]