-
Notifications
You must be signed in to change notification settings - Fork 0
/
TrieStructure.java
181 lines (169 loc) · 4.74 KB
/
TrieStructure.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import java.util.ArrayList;
/**
* My implementation of a Trie Tree. Stores important information
* about the contents of a document and their word indexes. Searches
* can be conducted in near O(md) where m is size of alphabet and d length of
* search string.
*
* @author Chuka Okoye
*/
public class TrieStructure
{
private TrieNode rootNode;
/**
* Simple constructor to initialize our Trie Data Structure with an empty element
* @param nothing
* */
public TrieStructure()
{
//Should only create the empty root node
rootNode = new TrieNode('.',false);
}
/**
* Function responsible for inserting new words into a Trie Data Struture
* @param word, a word to be inserted into Trie Data Structure
* @param occurIndex, the index in the document the word occurs
* @return true if insert successful
*/
public boolean putWord(String word, int occurIndex)
{
if(checkValidity(word))
{
char[] charArray = word.toLowerCase().toCharArray();
ArrayList<TrieNode> tempNodeList = rootNode.childNodes;
boolean inserted;
for(int i=0; i<charArray.length; i++)
{
inserted = false;
while(inserted == false)
{
if(rootNode.childNodes.isEmpty()) //test for first insertion
{
inserted = true;
TrieNode temp;
if(charArray.length == 1) //test for endWord
{
temp = new TrieNode(charArray[0],true);
}
else
{
temp = new TrieNode(charArray[0], false);
}
rootNode.childNodes.add(temp);
tempNodeList = rootNode.childNodes.get(0).childNodes;
}
else
{
int index = getIndexChar(charArray[i],tempNodeList);
inserted = true;
//Two cases, either current char exists already or does not.
if(index != -1)
{
if((charArray.length-1) == i) //test for endWord [REFACTOR]
{
TrieNode pointer = tempNodeList.get(index);
pointer.endWord = true;
}
tempNodeList.get(index).index.add(occurIndex);
tempNodeList = tempNodeList.get(index).childNodes;
}
else
{
if((charArray.length-1) == i) //test for endWord [REFACTOR]
{
TrieNode aNode = new TrieNode(charArray[i],true);
aNode.index.add(occurIndex);
tempNodeList.add(aNode);
tempNodeList = tempNodeList.get(tempNodeList.indexOf(aNode)).childNodes;
}
else
{
TrieNode aNode = new TrieNode(charArray[i], false);
aNode.index.add(occurIndex);
tempNodeList.add(aNode);
tempNodeList = tempNodeList.get(tempNodeList.indexOf(aNode)).childNodes;
}
}
}
}
}
return true;
}
return false;
}
/**
* A function to traverse the Trie tree and retrieve all indexes a word occurs in a document.
* @param word, the word whose indexes in document should be returned
* @return ArrayList<Integer>, a list containing all indexes of specified word else returns null if not exist.
*/
public ArrayList<Integer> getWordIndexes(String word)
{
if(word.equals(""))
return null; //Error check.
ArrayList<TrieNode> tempNodeList = rootNode.childNodes;
char[] charArray = word.toLowerCase().toCharArray();
int index = -1;
for(int i=0; i<charArray.length; i++)
{
index = getIndexChar(charArray[i], tempNodeList);
if(index != -1)
{
if(i != (charArray.length-1))
tempNodeList = tempNodeList.get(index).childNodes;
}
else
return null; //Word simply does not exist in tree
}
try
{
if(tempNodeList.get(index).endWord)
return tempNodeList.get(index).index; //Oleee! found the exact word.
else
return null; //A similar word exists in tree that has same the prefix as search phrase.
}
catch(ArrayIndexOutOfBoundsException e)
{
return null; //Will never occur
}
}
/**
* A function to search through a list of nodes to find a corresponding character
* @param value, value to be checked
* @param nodes, a list whose characters will be checked against
* @return the index of the node containing such character. -1 if not found
*/
private int getIndexChar(char value, ArrayList<TrieNode> nodes)
{
for(int i=0; i<nodes.size(); i++)
{
if(nodes.get(i).character == value)
return i;
}
return -1;
}
/**
* Makes sure only the letters a-z, A-Z and 0-9 exist in the string
* @param value, string to be verified
* @return true if it is a valid string
*/
private boolean checkValidity(String string)
{
char[] myArray = null;
try
{
myArray = string.toCharArray();
}
catch(NullPointerException e)
{
return false;
}
int value;
for(int i=0; i<myArray.length; i++)
{
value = (int)myArray[i];
if(!((value >= 48 && value <= 57) || (value >= 65 && value <= 90) || (value >= 97 && value <= 122)))
return false;
}
return true;
}
}