-
Notifications
You must be signed in to change notification settings - Fork 0
/
bitalphabeta.py
208 lines (188 loc) · 6.84 KB
/
bitalphabeta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tic-tac-toe using minimax algorithm with alpha-beta pruning and optionlly use
transposition table and heuristic improvement. Bitboard representation is used.
"""
import random
import sys
import itertools
import gmpy
PLAYERS = [1, -1] # maximizer == 1
COORDS = [(r, c) for r in range(3) for c in range(3)]
COUNT = 0
def symbol(code):
"""Return the symbol of player"""
assert code in PLAYERS
return "X" if code == 1 else "O"
def grouper(iterable, n, fillvalue=None):
# function copied from Python doc, itertools module
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)
class Board:
"""bit-vector based tic-tac-toe board"""
def __init__(self, board=0):
self.board = board
def mask(self, row, col, player):
"""Produce the bitmask for row and col
The 18-bit vector is row-major, with matrix cell (0,0) the MSB. And the
higher 9-bit is for 1 (X) and lower 9-bit is for -1 (O)
Args:
row, col: integers from 0 to 2 inclusive
"""
offset = 3*(2-row) + (2-col)
if player == 1:
offset += 9
return 1 << offset
def place(self, row, col, player):
"""produce a new board with row and col set to a symbol. Return None if
some symbol already set.
Args:
what: either +1 or -1
"""
assert player in PLAYERS
mask = self.mask(row, col, player)
othermask = self.mask(row, col, -player)
if (mask | othermask) & self.board:
return None # something already on this position
return Board(self.board | mask)
def __repr__(self):
def emit():
omask = 1 << 8
xmask = omask << 9
while omask: # until the mask becomes zero
yield "O" if self.board & omask else "X" if self.board & xmask else " "
omask >>= 1
xmask >>= 1
separator = "\n---+---+---\n "
return " " + separator.join(" | ".join(g) for g in grouper(emit(), 3))
def spaces(self):
"""tell how many empty spots on the board"""
# alternative if no gmpy: bit(self.board).count("1")
return 9 - gmpy.popcount(self.board)
masks = (0b000000111, 0b000111000, 0b111000000, # rows
0b001001001, 0b010010010, 0b100100100, # cols
0b100010001, 0b001010100 # diags
)
def won(self):
"""check winner. Return the winner (+1 or -1) or None"""
shifted = self.board >> 9
for mask in self.masks:
if self.board & mask == mask:
return -1
if shifted & mask == mask:
return 1
def simple_evaluate(board):
"""simple evaluator: +10, -10 for someone won, 0 for tie. None otherwise"""
winner = board.won()
if winner == 1:
return 10
elif winner == -1:
return -10
if not board.spaces():
return 0
def heuristic_evaluate(board):
"""heuristic evaluation <http://www.ntu.edu.sg/home/ehchua/programming/java/javagame_tictactoe_ai.html>"""
score = 0
for mask in Board.masks:
# 3-in-a-row == score 100
# 2-in-a-row == score 10
# 1-in-a-row == score 1
# 0-in-a-row, or mixed entries == score 0 (no chase for either to win)
# X == positive, O == negative
oboard = board.board
xboard = oboard >> 9
countx = gmpy.popcount(xboard & mask)
counto = gmpy.popcount(oboard & mask)
if countx == 0:
score -= int(10**(counto-1))
elif counto == 0:
score += int(10**(countx-1))
return score
evaluate = simple_evaluate
CACHE = {}
def simple_minimax(board, player):
"""player to move one step on the board, find the minimax (best of the worse case) score"""
# check cache for quick return
if (board.board, player) in CACHE:
return CACHE[(board.board, player)]
global COUNT
COUNT += 1
assert player in PLAYERS
opponent = -player
value = evaluate(board)
if value is not None:
return value # exact score of the board
# possible opponent moves: The worse case scores in different options
candscores = [simple_minimax(b, opponent) for b in [board.place(r, c, player) for r, c in COORDS] if b]
# evaluate the best of worse case scores
if player == 1:
value = max(candscores)
else:
value = min(candscores)
# save into cache
CACHE[(board.board, player)] = value
return value
def alphabeta(board, player, alpha=-float("inf"), beta=float("inf")):
"""minimax with alpha-beta pruning. It implies that we expect the score
should between lowerbound alpha and upperbound beta to be useful
"""
global COUNT
COUNT += 1
assert player in PLAYERS
opponent = -player
value = evaluate(board)
if value is not None:
return value # exact score of the board (terminal nodes)
# minimax search with alpha-beta pruning
children = filter(None, [board.place(r, c, player) for r, c in COORDS])
if "Heuristic improvement" == False:
# sort by a heuristic function to hint for earlier cut-off
children = sorted(children, key=heuristic_evaluate, reverse=True)
if player == 1: # player is maximizer
value = -float("inf")
for child in children:
value = max(value, alphabeta(child, opponent, alpha, beta))
alpha = max(alpha, value)
if alpha >= beta:
break # beta cut-off
else: # player is minimizer
value = float("inf")
for child in children:
value = min(value, alphabeta(child, opponent, alpha, beta))
beta = min(beta, value)
if alpha >= beta:
break # alpha cut-off
return value
minimax = simple_minimax
def play():
"auto play tic-tac-toe"
global COUNT
minimizer = True
game = Board()
# loop until the game is done
while not game.won():
player = PLAYERS[minimizer]
opponent = PLAYERS[not minimizer]
COUNT = 0
candidates = [(b, minimax(b, opponent)) for b in [game.place(r, c, player) for r, c in COORDS] if b]
if not candidates:
break
random.shuffle(candidates)
# find best move: optimizing the worse case score
if player == 1:
game = max(candidates, key=lambda pair: pair[1])[0]
else:
game = min(candidates, key=lambda pair: pair[1])[0]
# print board and switch
minimizer = not minimizer
print("\n%s move after %d search steps:" % (symbol(player), COUNT))
print(game)
# show result
winner = game.won()
if not winner:
print("\nTied")
else:
print("\n%s has won" % symbol(winner))
if __name__ == "__main__":
random.seed(int(sys.argv[1]))
play()