-
Notifications
You must be signed in to change notification settings - Fork 42
/
Parser.php
172 lines (146 loc) · 4.52 KB
/
Parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
<?php
/**
* Works with a Name object to parse out the parts of a name.
*
* Example usage:
* $parser = new Parser("John Q. Smith");
* echo $parser->getLast() . ", " . $parser->getFirst();
* //returns "Smith, John"
*
*
*/
class HumanNameParser_Parser {
private $name;
private $leadingInit;
private $first;
private $nicknames;
private $middle;
private $last;
private $suffix;
private $suffixes;
private $prefixes;
/*
* Constructor
*
* @param mixed $name Either a name as a string or as a Name object.
*/
public function __construct($name = NULL)
{
$this->setName($name);
}
/**
* Sets name string and parses it.
* Takes Name object or a simple string (converts the string into a Name obj),
* parses and loads its constituant parts.
*
* @param mixed $name Either a name as a string or as a Name object.
*/
public function setName($name = NULL){
if ($name) {
if (is_object($name) && get_class($name) == "HumanNameParser_Name") { // this is mostly for testing
$this->name = $name;
}
else {
$this->name = new HumanNameParser_Name($name);
}
$this->leadingInit = "";
$this->first = "";
$this->nicknames = "";
$this->middle = "";
$this->last = "";
$this->suffix = "";
$this->suffixes = array('esq','esquire','jr','sr','2','ii','iii','iv', 'v', 'phd');
$this->prefixes = array('bar','ben','bin','da','dal','de la', 'de', 'del','der','di',
'ibn','la','le','san','st','ste','van', 'van der', 'van den', 'vel','von');
$this->parse();
}
}
public function getleadingInit() {
return $this->leadingInit;
}
public function getFirst() {
return $this->first;
}
public function getNicknames() {
return $this->nicknames;
}
public function getMiddle() {
return $this->middle;
}
public function getLast() {
return $this->last;
}
public function getSuffix() {
return $this->suffix;
}
public function getName(){
return $this->name;
}
/**
* returns all the parts of the name as an array
*
* @param String $arrType pass 'int' to get an integer-indexed array (default is associative)
* @return array An array of the name-parts
*/
public function getArray($arrType = 'assoc') {
$arr = array();
$arr['leadingInit'] = $this->leadingInit;
$arr['first'] = $this->first;
$arr['nicknames'] = $this->nicknames;
$arr['middle'] = $this->middle;
$arr['last'] = $this->last;
$arr['suffix'] = $this->suffix;
if ($arrType == 'assoc') {
return $arr;
}
else if ($arrType == 'int'){
return array_values($arr);
}
else {
throw new Exception("Array must be associative ('assoc') or numeric ('num').");
}
}
/*
* Parse the name into its constituent parts.
*
* Sequentially captures each name-part, working in from the ends and
* trimming the namestring as it goes.
*
* @return boolean true on success
*/
private function parse()
{
$suffixes = implode("\.*|", $this->suffixes) . "\.*"; // each suffix gets a "\.*" behind it.
$prefixes = implode(" |", $this->prefixes) . " "; // each prefix gets a " " behind it.
// The regex use is a bit tricky. *Everything* matched by the regex will be replaced,
// but you can select a particular parenthesized submatch to be returned.
// Also, note that each regex requres that the preceding ones have been run, and matches chopped out.
$nicknamesRegex = "/ ('|\"|\(\"*'*)(.+?)('|\"|\"*'*\)) /"; // names that starts or end w/ an apostrophe break this
$suffixRegex = "/,* *($suffixes)$/";
$lastRegex = "/(?!^)\b([^ ]+ y |$prefixes)*[^ ]+$/";
$leadingInitRegex = "/^(.\.*)(?= \p{L}{2})/"; // note the lookahead, which isn't returned or replaced
$firstRegex = "/^[^ ]+/"; //
// get nickname, if there is one
$this->nicknames = $this->name->chopWithRegex($nicknamesRegex, 2);
// get suffix, if there is one
$this->suffix = $this->name->chopWithRegex($suffixRegex, 1);
// get the first initial, if there is one
$this->leadingInit = $this->name->chopWithRegex($leadingInitRegex, 1);
// flip the before-comma and after-comma parts of the name
$this->name->flip(",");
// get the last name
$this->last = $this->name->chopWithRegex($lastRegex, 0);
if (!$this->last){
throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'.");
}
// get the first name
$this->first = $this->name->chopWithRegex($firstRegex, 0);
if (!$this->first){
throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'");
}
// if anything's left, that's the middle name
$this->middle = $this->name->getStr();
return true;
}
}
?>