-
Notifications
You must be signed in to change notification settings - Fork 7
/
skraflpermuter.py
301 lines (257 loc) · 11.3 KB
/
skraflpermuter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
"""
SCRABBLE(tm) rack permutations
Copyright (C) 2023 Miðeind ehf.
Original author: Vilhjálmur Þorsteinsson
This module implements a main class named Tabulator and
a helper class named WordDatabase.
Tabulator takes a SCRABBLE(tm) rack and processes it to find
all valid word permutations within the rack. It can then
be queried for the number of such permutations, the list
of permutations, the highest "plain" score in the list,
and the list of the permutations having the highest such score.
It also generates valid combinations of the
rack with a single additional letter.
WordDatabase can judge whether a particular word is valid
on the board, given a dictionary. It uses a preprocessed word
graph (DAWG) loaded from a text file. The word graph is
implemented in class DawgDictionary in dawgdictionary.py.
Note: SCRABBLE is a registered trademark. This software or its author
are in no way affiliated with or endorsed by the owners or licensees
of the SCRABBLE trademark.
"""
import os
import logging
import time
import sys
from dawgdictionary import Wordbase
from languages import Alphabet
class WordDatabase:
""" Maintains the set of permitted words and judges whether a word is acceptable.
This class loads and maintains a Directed Acyclic Word Graph (DAWG) of valid words.
The DAWG is used to check racks for validity and to find all valid words embedded
within a rack.
The word database is fairly big (several megabytes) so it is important to avoid multiple
instances of it. The Tabulator implementation below makes sure to use a singleton
instance of the WordDatabase class, in the class variable _word_db, across all invocations.
The word graph is loaded from a text file, 'ordalisti.text.dawg', assumed to be in
the 'resources' folder. This file is separately pre-generated using DawgBuilder.run_skrafl()
in dawgbuilder.py.
The graph contains a cleaned-up version of a database originally from bin.arnastofnun.is,
used under license conditions from "Stofnun Árna Magnússonar í íslenskum fræðum".
"""
def __init__(self):
# We maintain the list of permitted words in a DAWG dictionary
# The DAWG is lazily loaded into memory upon first use
self._dawg = None
def _load(self):
""" Load word lists into memory from static preprocessed text files """
if self._dawg is not None:
# Already loaded, nothing to do
return
self._dawg = Wordbase.dawg()
def initialize(self):
""" Force preloading of word lists into memory """
if self._dawg is None:
self._load()
def is_valid_word(self, word):
""" Checks whether a word is found in the list of legal words """
if not word:
return False
if self._dawg is None:
self._load()
assert self._dawg is not None
return self._dawg.find(word)
def __contains__(self, word):
""" Enable simple lookup syntax: "word" in word_db """
return self.is_valid_word(word)
def find_permutations(self, rack):
""" Find all embedded words within a rack """
if not rack:
return None
if self._dawg is None:
self._load()
assert self._dawg is not None
return self._dawg.find_permutations(rack)
def find_matches(self, pattern, sort=True):
""" Find all words that match a pattern """
if not pattern:
return None
if self._dawg is None:
self._load()
assert self._dawg is not None
return self._dawg.find_matches(pattern, sort)
def navigate(self, nav):
""" Use a generic navigator to traverse the graph """
if self._dawg is None:
self._load()
assert self._dawg is not None
self._dawg.navigate(nav)
class Tabulator:
""" Processes and tabulates the possible permutations and combinations
within a given rack.
An instance of this class is passed to the result.html web page
for display using the Jinja2 template mechanism.
"""
# A singleton instance of the WordDatabase class, used by
# all Tabulator instances throughout a server session
_word_db = None
def __init__(self):
self._counter = 0
self._allwords = [] # List of tuples: (word, score)
self._highscore = 0
self._highwords = []
self._combinations = { }
self._rack = ""
self._rack_is_valid = False # True if the rack is itself a valid word
self._pattern = False # True if the result is a pattern match ('=')
if Tabulator._word_db is None:
# The word database will be lazily loaded from file upon first use
Tabulator._word_db = WordDatabase()
def process(self, rack):
""" Generate the data that will be shown to the user on the result page.
This includes a list of permutations of the rack, as well as combinations
of the rack with a single additional letter. High scoring words are also
tabulated. """
# Start with basic hygiene
if not rack:
return False
rack = rack.strip()
if not rack:
return False
# Make sure we reset all state in case we're called multiple times
self._counter = 0
self._allwords = [] # List of tuples: (word, score)
self._highscore = 0
self._highwords = []
self._combinations = { }
self._rack = ""
self._pattern = False
# Do a sanity check on the input by calculating its raw score, thereby
# checking whether all the letters are valid
score = 0
rack_lower = "" # Rack converted to lowercase
wildcards = 0 # Number of wildcard characters
# If the rack starts with an equals sign ('=') we do a pattern match
# instead of a permutation search
if rack[0] == '=':
self._pattern = True
rack = rack[1:]
# Sanitize the rack, converting upper case to lower case and
# catching invalid characters
try:
for c in rack:
ch = c
if ch in Alphabet.upper:
# Uppercase: find corresponding lowercase letter
ch = Alphabet.lowercase(ch)
if ch in '?_*':
# This is one of the allowed wildcard characters
wildcards += 1
ch = '?'
else:
score += Alphabet.scores[ch]
rack_lower += ch
except KeyError:
# A letter in the rack is not valid, even after conversion to lower case
return False
if not self._pattern and (wildcards > 2):
# Too many wildcards in a permutation search - need to constrain result set size
return False
# The rack contains only valid letters
self._rack = rack_lower
# Generate combinations
if not self._pattern and not wildcards:
# If no wildcards given, check combinations with one additional letter
query = self._rack + '?'
# Permute the rack with one additional letter
p = self._word_db.find_permutations(query)
# Check the permutations to find valid words and their scores
if p is not None:
for word in p:
# Only interested in full-length permutations, i.e. with the additional letter
if len(word) == len(query):
# Find out which letter was added
addedletter = Alphabet.string_subtract(word, self._rack)
self._add_combination(addedletter, word)
# Check permutations
# The shortest possible rack to check for permutations is 2 letters
if len(self._rack) < 2:
return True
if self._pattern:
# Use pattern matching
p = self._word_db.find_matches(self._rack, True) # We'd like a sorted result
else:
# Find permutations
p = self._word_db.find_permutations(self._rack)
if p is None:
return True
for word in p:
if len(word) < 2:
# Don't show single letter words
continue
# Calculate the basic score of the word
score = self.score(word)
if wildcards and not self._pattern:
# Complication: Make sure we don't count the score of the wildcard tile(s)
wildchars = Alphabet.string_subtract(word, self._rack)
# What we have left are the wildcard substitutes: subtract'em
score -= self.score(wildchars)
self._add_permutation(word, score)
# Successful
return True
def score(self, word):
""" Calculate the score for a word """
if word is None:
return 0
try:
s = sum(Alphabet.scores[c] for c in word)
except KeyError:
# Word contains an unrecognized letter: return a zero score
s = 0
return s
def _add_permutation(self, word, score):
""" Add a valid permulation to the tabulation result """
self._counter += 1
self._allwords.append((word, score))
if score > self._highscore:
# New high scoring word: note it and start a new list
self._highscore = score
self._highwords = [word]
elif score == self._highscore:
# Equal score to the previous high scorer: append to the list
self._highwords.append(word)
def _add_combination(self, ch, word):
""" Add to a list of legal combinations that are possible if the letter ch is added to the rack """
if ch in self._combinations:
self._combinations[ch].append(word)
else:
self._combinations[ch] = [word]
def rack(self):
""" Returns a display form of the rack that was tabulated """
return ('= ' + self._rack) if self._pattern else self._rack
def count(self):
""" Returns a count of all valid letter permutations in the rack """
return self._counter
def allwords(self):
""" Returns a list of (word, score) tuples for all valid letter permulations in the rack """
return self._allwords
def highscore(self):
"""Returns the highest scoring letter permutation (by plain summation) in the rack"""
return self._highscore
def highwords(self):
""" Returns a list of all letter permutations in the rack having the highest score """
return self._highwords
def combinations(self):
""" Returns a list of the combinations possible with additional letters.
The list contains (ch, wordlist) tuples where ch is the additional letter
and wordlist is a list of legal words that can be formed with that letter. """
lc = list(self._combinations.items())
if lc:
# Sort the combinations properly in alphabetical order before returning them
lc.sort(key = lambda x: Alphabet.order.index(x[0]))
return lc
# No combinations
return None
def is_valid_word(self, word):
""" Checks whether a word is valid """
return Tabulator._word_db.is_valid_word(word)