-
Notifications
You must be signed in to change notification settings - Fork 1
/
Amslib_String.php
356 lines (323 loc) · 12.2 KB
/
Amslib_String.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
<?php
class Amslib_String
{
/**
* method: slugify
*
* Converts a string into something that replaces all non-url-compatible characters with
* a "slug" this is useful for creating article names in a website, where including a " " (space)
* in the filename is going to break the url and cause problems. Will also remove more than just space
* but all other none-alphanumeric type characters and transliterate accented characters into none-accented
* versions. This function automatically lower cases the entire text string
*
* parameters:
* $text - The text to slugify
* $remove - default "" (empty, nothing), any extra regex to remove - WARNING, you could break your code by putting non-functioning regex operators here
* $replace - default "-", the character to replace all the non-matching characters with
*
* returns:
* A string which has been stripped of all the invalid characters, in lower case
*
* notes:
* - blatently stolen code from: http://snipplr.com/view/22741/slugify-a-string-in-php/ :-) thank you!
* - modified 01/08/2011: added ability to allow custom regex through the $remove parameter
* so you can add terms if required
*
* todo:
* investigate whether the remove unwanted character step should be BEFORE
* the replace step since the more it was been observed, the more that it makes sense.
*/
static public function slugify($text,$remove="",$replace="-")
{
// replace non letter or digits by -
$text = preg_replace("~[^\\pL\d{$remove}]+~u", $replace, $text);
// trim and transliterate the string to be baseline ASCII and lowercase it for good luck
$text = trim($text, $replace);
if (function_exists('iconv')) $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
$text = strtolower($text);
// remove unwanted characters
$text = preg_replace("~[^-\w{$remove}]+~", '', $text);
return $text;
}
/**
* method: slugify2
*
* A new version which I think will function better than the original
* I just don't want to replace it yet without testing it a bit before and
* being more confident
*
* parameters:
* $string - The string to slugify
* $slug - The character to use when slugifying the parts together
* $extra - Extra characters you want to allow in the string which normally would be removed
*
* NOTE: I stole this code from => https://github.com/alixaxel/phunction/blob/master/phunction/Text.php
*/
public static function slugify2($string, $slug = '-', $extra = null)
{
$string = self::translit($string,$extra);
$string = preg_replace('~[^0-9a-z'.preg_quote($extra, '~').']+~i',$slug, $string);
// This part will clean up the end of the filename, before the extension
// But only do it if you find more than one part because there was an extension
$parts = explode(".",$string);
if(count($parts) > 1){
$extension = array_pop($parts);
$string = rtrim(implode(".",$parts),$slug).".$extension";
}
return strtolower(trim($string, $slug));
}
/**
* method: trimString
*
* A cheaper, but far less useful version of self::truncateString, does not consider html, does nothing
* except chop where it was told and append the postfix, job done. It's quite stupid.
*
* parameters:
* $text - The string to trim
* $length - default 100, the length required
* $ending - default "...", the ending to append if a string is truncated
*
* returns:
* A truncated string, or the original string if it was not longer than required
*/
static public function trimString($text,$length=100,$ending="...")
{
$length = $length-strlen($ending);
return (strlen($text) > $length) ? substr($text,0,$length).$ending : $text;
}
/**
* method: truncateString
*
* A more intelligent truncate string method that will cut a string better than just substr()
*
* parameters:
* $text - the string to truncate
* $length - default 100, the length required
* $ending - default "...", the ending to append if a string is truncated
* $exact - default false, if true, will not cut a word in two, but look for a space in the
* truncated string and truncate to that position, so words are not cut in the midd....(<-irony)
* $considerHtml - default true, whether or not to consider HTML tags, so the code doesn't cut them
* in the middle and break the HTML structure of a text string
*
* notes:
* - I copied this code from CakePHP::truncate() which was super useful
* - I just didnt want to import the CakePHP namespace, I wanted to just merge this functionality
*/
static public function truncateString($text, $length = 100, $ending = '...', $exact = false, $considerHtml = true)
{
if ($considerHtml)
{
// if the plain text is shorter than the maximum length, return the whole text
if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
return $text;
}
// splits all html-tags to scanable lines
preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
$total_length = strlen($ending);
$open_tags = array();
$truncate = '';
foreach ($lines as $line_matchings)
{
// if there is any html-tag in this line, handle it and add it (uncounted) to the output
if (!empty($line_matchings[1]))
{
// if it's an "empty element" with or without xhtml-conform closing slash
if (preg_match( '/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is',$line_matchings[1]))
{
// do nothing
// if tag is a closing tag
} else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
// delete tag from $open_tags list
$pos = array_search($tag_matchings[1], $open_tags);
if ($pos !== false) {
unset($open_tags[$pos]);
}
// if tag is an opening tag
} else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
// add tag to the beginning of $open_tags list
array_unshift($open_tags, strtolower($tag_matchings[1]));
}
// add html-tag to $truncate'd text
$truncate .= $line_matchings[1];
}
// calculate the length of the plain text part of the line; handle entities as one character
$content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
if ($total_length+$content_length> $length)
{
// the number of characters which are left
$left = $length - $total_length;
$entities_length = 0;
// search for html entities
if (preg_match_all( '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i',
$line_matchings[2],
$entities,
PREG_OFFSET_CAPTURE))
{
// calculate the real length of all entities in the legal range
foreach ($entities[0] as $entity) {
if ($entity[1]+1-$entities_length <= $left) {
$left--;
$entities_length += strlen($entity[0]);
} else {
// no more characters left
break;
}
}
}
$truncate .= substr($line_matchings[2], 0, $left+$entities_length);
// maximum length is reached, so get off the loop
break;
} else {
$truncate .= $line_matchings[2];
$total_length += $content_length;
}
// if the maximum length is reached, get off the loop
if($total_length>= $length) {
break;
}
}
} else {
if (strlen($text) <= $length) {
return $text;
} else {
$truncate = substr($text, 0, $length - strlen($ending));
}
}
// if the words shouldn't be cut in the middle...
if (!$exact) {
// ...search the last occurance of a space...
$spacepos = strrpos($truncate, ' ');
if (isset($spacepos)) {
// ...and cut the text in this position
$truncate = substr($truncate, 0, $spacepos);
}
}
// add the defined ending to the text
$truncate .= $ending;
if($considerHtml) {
// close all unclosed html-tags
foreach ($open_tags as $tag) {
$truncate .= '</' . $tag . '>';
}
}
return $truncate;
}
/**
* method: translit
*
* A function which strips away the accents and other unwanted characters from
* a url, making it accent-crazy-less
*
* NOTE: I stole this code from => https://github.com/alixaxel/phunction/blob/master/phunction/Text.php
*/
static public function translit($text,$extra=null)
{
$text = htmlentities($text, ENT_QUOTES, 'UTF-8');
$text = preg_replace('~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i', '$1', $text);
$text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
$text = preg_replace(array('~[^0-9a-z'.preg_quote($extra,'~').']~i', '~[ -]+~'), ' ', $text);
return trim($text, ' -');
}
/**
* method: reduceSlashes
*
* Reduce the consecutive slashes in a string to a single item, /=>/, //=>/, ///=>/, etc
*
* params:
* $string - The string to process
*
* returns:
* A string with all the slashes reduced
*
* notes:
* If the string is a URL beginning with http://, use Amslib_Website::reduceSlashes instead
*/
static public function reduceSlashes($string)
{
return preg_replace('#//+#','/',$string);
}
/**
* method: stripComments
*
* Remove all comments from a string, it might not be perfect
*
* params:
* $string - The string to process
*
* returns:
* A string without comments
*
* notes:
* - I got some of this code originally from: http://stackoverflow.com/a/1581063/279147
*/
static public function stripComments($string)
{
if(!is_string($string)){
Amslib_Debug::log("Attempting to strip comments from something that is not a string");
return false;
}
$string = preg_replace('#<!--[^\[<>].*?(?<!!)-->#s', '', $string);
$regex = array(
"`^([\t\s]+)`ism"=>'',
"`\/\*(.+?)\*\/`ism"=>"",
"`([\n\A;]+)\/\*(.+?)\*\/`ism"=>"$1",
"`([\n\A;\s]+)//(.+?)[\n\r]`ism"=>"$1\n",
"`(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+`ism"=>"\n"
);
$string = preg_replace(array_keys($regex),$regex,$string);
return $string;
}
/**
* method: lchop
*
* Chop a string to remove everything to the left of the
* search, leaving only what is on the right of the search token
*
* parameters:
* $str - The string o search through
* $search - The search token to find
* $removeSearch - Whether or not to remove the search token from the return string
*
* fixme:
* there is a bug here in the amslib power panel has a 500 webserver
* error when you return "" or false for not finding a string
*
* notes:
* - I think it makes more sense now to return false, since if you
* return a string, it's like you've found a result, but thats not true
* - I disabled the removeSearch code since it was causing a 500 webserver error
*/
static public function lchop($str,$search,$removeSearch=false)
{
$p = strlen($search) ? strpos($str,$search) : false;
// TODO: fix the bugs and test this next line to optionally remove the search string instead of doing it by default
// NOTE: I didnt want to activate this by default in case it broke things I didnt realise
//if($removeSearch) $p+=strlen($search);
return ($p) !== false ? substr($str,$p+strlen($search)) : $str;
}
/**
* method: rchop
*
* Chop a string to remove everything to the right of the
* search, leaving only what is on the left of the search token
*
* parameters:
* $str - The string o search through
* $search - The search token to find
*
* fixme:
* there is a bug here in the amslib power panel has a 500 webserver
* error when you return "" or false for not finding a string
*
* notes:
* - I think it makes more sense now to return false, since if you
* return a string, it's like you've found a result, but thats not true
* - Why does this function not have a $removeSearch parameter like
* lchop? seems inconsistent
*/
static public function rchop($str,$search)
{
$p = strlen($search) ? strrpos($str,$search) : false;
return ($p) !== false ? substr($str,0,$p) : $str;
}
}