-
Notifications
You must be signed in to change notification settings - Fork 5.6k
/
FBString.h
2888 lines (2507 loc) · 87.7 KB
/
FBString.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// String type.
#pragma once
#include <algorithm>
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <iosfwd>
#include <limits>
#include <stdexcept>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>
#include <fmt/format.h>
#include <folly/CPortability.h>
#include <folly/CppAttributes.h>
#include <folly/Likely.h>
#include <folly/Portability.h>
#include <folly/Traits.h>
#include <folly/hash/Hash.h>
#include <folly/lang/Assume.h>
#include <folly/lang/CheckedMath.h>
#include <folly/lang/Exception.h>
#include <folly/memory/Malloc.h>
#if FOLLY_CPLUSPLUS >= 202002L
#include <compare>
#endif
FOLLY_PUSH_WARNING
// Ignore shadowing warnings within this file, so includers can use -Wshadow.
FOLLY_GNU_DISABLE_WARNING("-Wshadow")
namespace folly {
// When compiling with ASan, always heap-allocate the string even if
// it would fit in-situ, so that ASan can detect access to the string
// buffer after it has been invalidated (destroyed, resized, etc.).
// Note that this flag doesn't remove support for in-situ strings, as
// that would break ABI-compatibility and wouldn't allow linking code
// compiled with this flag with code compiled without.
#ifdef FOLLY_SANITIZE_ADDRESS
#define FBSTRING_DISABLE_SSO true
#else
#define FBSTRING_DISABLE_SSO false
#endif
namespace fbstring_detail {
template <class InIt, class OutIt>
inline std::pair<InIt, OutIt> copy_n(
InIt b, typename std::iterator_traits<InIt>::difference_type n, OutIt d) {
for (; n != 0; --n, ++b, ++d) {
*d = *b;
}
return std::make_pair(b, d);
}
template <class Pod, class T>
inline void podFill(Pod* b, Pod* e, T c) {
assert(b && e && b <= e);
constexpr auto kUseMemset = sizeof(T) == 1;
if /* constexpr */ (kUseMemset) {
memset(b, c, size_t(e - b));
} else {
auto const ee = b + ((e - b) & ~7u);
for (; b != ee; b += 8) {
b[0] = c;
b[1] = c;
b[2] = c;
b[3] = c;
b[4] = c;
b[5] = c;
b[6] = c;
b[7] = c;
}
// Leftovers
for (; b != e; ++b) {
*b = c;
}
}
}
/*
* Lightly structured memcpy, simplifies copying PODs and introduces
* some asserts. Unfortunately using this function may cause
* measurable overhead (presumably because it adjusts from a begin/end
* convention to a pointer/size convention, so it does some extra
* arithmetic even though the caller might have done the inverse
* adaptation outside).
*/
template <class Pod>
inline void podCopy(const Pod* b, const Pod* e, Pod* d) {
assert(b != nullptr);
assert(e != nullptr);
assert(d != nullptr);
assert(e >= b);
assert(d >= e || d + (e - b) <= b);
memcpy(d, b, (e - b) * sizeof(Pod));
}
/*
* Lightly structured memmove, simplifies copying PODs and introduces
* some asserts
*/
template <class Pod>
inline void podMove(const Pod* b, const Pod* e, Pod* d) {
assert(e >= b);
memmove(d, b, (e - b) * sizeof(*b));
}
} // namespace fbstring_detail
/**
* Defines a special acquisition method for constructing fbstring
* objects. AcquireMallocatedString means that the user passes a
* pointer to a malloc-allocated string that the fbstring object will
* take into custody.
*/
enum class AcquireMallocatedString {};
/*
* fbstring_core_model is a mock-up type that defines all required
* signatures of a fbstring core. The fbstring class itself uses such
* a core object to implement all of the numerous member functions
* required by the standard.
*
* If you want to define a new core, copy the definition below and
* implement the primitives. Then plug the core into basic_fbstring as
* a template argument.
template <class Char>
class fbstring_core_model {
public:
fbstring_core_model();
fbstring_core_model(const fbstring_core_model &);
fbstring_core_model& operator=(const fbstring_core_model &) = delete;
~fbstring_core_model();
// Returns a pointer to string's buffer (currently only contiguous
// strings are supported). The pointer is guaranteed to be valid
// until the next call to a non-const member function.
const Char * data() const;
// Much like data(), except the string is prepared to support
// character-level changes. This call is a signal for
// e.g. reference-counted implementation to fork the data. The
// pointer is guaranteed to be valid until the next call to a
// non-const member function.
Char* mutableData();
// Returns a pointer to string's buffer and guarantees that a
// readable '\0' lies right after the buffer. The pointer is
// guaranteed to be valid until the next call to a non-const member
// function.
const Char * c_str() const;
// Shrinks the string by delta characters. Asserts that delta <=
// size().
void shrink(size_t delta);
// Expands the string by delta characters (i.e. after this call
// size() will report the old size() plus delta) but without
// initializing the expanded region. The expanded region is
// zero-terminated. Returns a pointer to the memory to be
// initialized (the beginning of the expanded portion). The caller
// is expected to fill the expanded area appropriately.
// If expGrowth is true, exponential growth is guaranteed.
// It is not guaranteed not to reallocate even if size() + delta <
// capacity(), so all references to the buffer are invalidated.
Char* expandNoinit(size_t delta, bool expGrowth);
// Expands the string by one character and sets the last character
// to c.
void push_back(Char c);
// Returns the string's size.
size_t size() const;
// Returns the string's capacity, i.e. maximum size that the string
// can grow to without reallocation. Note that for reference counted
// strings that's technically a lie - even assigning characters
// within the existing size would cause a reallocation.
size_t capacity() const;
// Returns true if the data underlying the string is actually shared
// across multiple strings (in a refcounted fashion).
bool isShared() const;
// Makes sure that at least minCapacity characters are available for
// the string without reallocation. For reference-counted strings,
// it should fork the data even if minCapacity < size().
void reserve(size_t minCapacity);
};
*/
/**
* This is the core of the string. The code should work on 32- and
* 64-bit and both big- and little-endianan architectures with any
* Char size.
*
* The storage is selected as follows (assuming we store one-byte
* characters on a 64-bit machine): (a) "small" strings between 0 and
* 23 chars are stored in-situ without allocation (the rightmost byte
* stores the size); (b) "medium" strings from 24 through 254 chars
* are stored in malloc-allocated memory that is copied eagerly; (c)
* "large" strings of 255 chars and above are stored in a similar
* structure as medium arrays, except that the string is
* reference-counted and copied lazily. the reference count is
* allocated right before the character array.
*
* The discriminator between these three strategies sits in two
* bits of the rightmost char of the storage:
* - If neither is set, then the string is small. Its length is represented by
* the lower-order bits on little-endian or the high-order bits on big-endian
* of that rightmost character. The value of these six bits is
* `maxSmallSize - size`, so this quantity must be subtracted from
* `maxSmallSize` to compute the `size` of the string (see `smallSize()`).
* This scheme ensures that when `size == `maxSmallSize`, the last byte in the
* storage is \0. This way, storage will be a null-terminated sequence of
* bytes, even if all 23 bytes of data are used on a 64-bit architecture.
* This enables `c_str()` and `data()` to simply return a pointer to the
* storage.
*
* - If the MSb is set, the string is medium width.
*
* - If the second MSb is set, then the string is large. On little-endian,
* these 2 bits are the 2 MSbs of MediumLarge::capacity_, while on
* big-endian, these 2 bits are the 2 LSbs. This keeps both little-endian
* and big-endian fbstring_core equivalent with merely different ops used
* to extract capacity/category.
*/
template <class Char>
class fbstring_core {
public:
fbstring_core() noexcept { reset(); }
fbstring_core(const fbstring_core& rhs) {
assert(&rhs != this);
switch (rhs.category()) {
case Category::isSmall:
copySmall(rhs);
break;
case Category::isMedium:
copyMedium(rhs);
break;
case Category::isLarge:
copyLarge(rhs);
break;
default:
folly::assume_unreachable();
}
assert(size() == rhs.size());
assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
}
fbstring_core& operator=(const fbstring_core& rhs) = delete;
fbstring_core(fbstring_core&& goner) noexcept {
// Take goner's guts
ml_ = goner.ml_;
// Clean goner's carcass
goner.reset();
}
fbstring_core(
const Char* const data,
const size_t size,
bool disableSSO = FBSTRING_DISABLE_SSO) {
if (!disableSSO && size <= maxSmallSize) {
initSmall(data, size);
} else if (size <= maxMediumSize) {
initMedium(data, size);
} else {
initLarge(data, size);
}
assert(this->size() == size);
assert(size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0);
}
~fbstring_core() noexcept {
if (category() == Category::isSmall) {
return;
}
destroyMediumLarge();
}
// Snatches a previously mallocated string. The parameter "size"
// is the size of the string, and the parameter "allocatedSize"
// is the size of the mallocated block. The string must be
// \0-terminated, so allocatedSize >= size + 1 and data[size] == '\0'.
//
// So if you want a 2-character string, pass malloc(3) as "data",
// pass 2 as "size", and pass 3 as "allocatedSize".
fbstring_core(
Char* const data,
const size_t size,
const size_t allocatedSize,
AcquireMallocatedString) {
if (size > 0) {
assert(allocatedSize >= size + 1);
assert(data[size] == '\0');
// Use the medium string storage
ml_.data_ = data;
ml_.size_ = size;
// Don't forget about null terminator
ml_.setCapacity(allocatedSize - 1, Category::isMedium);
} else {
// No need for the memory
free(data);
reset();
}
}
// swap below doesn't test whether &rhs == this (and instead
// potentially does extra work) on the premise that the rarity of
// that situation actually makes the check more expensive than is
// worth.
void swap(fbstring_core& rhs) {
auto const t = ml_;
ml_ = rhs.ml_;
rhs.ml_ = t;
}
// In C++11 data() and c_str() are 100% equivalent.
const Char* data() const { return c_str(); }
Char* data() { return c_str(); }
Char* mutableData() {
switch (category()) {
case Category::isSmall:
return small_;
case Category::isMedium:
return ml_.data_;
case Category::isLarge:
return mutableDataLarge();
}
folly::assume_unreachable();
}
const Char* c_str() const {
const Char* ptr = ml_.data_;
// With this syntax, GCC and Clang generate a CMOV instead of a branch.
ptr = (category() == Category::isSmall) ? small_ : ptr;
return ptr;
}
void shrink(const size_t delta) {
if (category() == Category::isSmall) {
shrinkSmall(delta);
} else if (
category() == Category::isMedium || RefCounted::refs(ml_.data_) == 1) {
shrinkMedium(delta);
} else {
shrinkLarge(delta);
}
}
FOLLY_NOINLINE
void reserve(size_t minCapacity, bool disableSSO = FBSTRING_DISABLE_SSO) {
FOLLY_PUSH_WARNING
FOLLY_CLANG_DISABLE_WARNING("-Wcovered-switch-default")
switch (category()) {
case Category::isSmall:
reserveSmall(minCapacity, disableSSO);
break;
case Category::isMedium:
reserveMedium(minCapacity);
break;
case Category::isLarge:
reserveLarge(minCapacity);
break;
default:
folly::assume_unreachable();
}
FOLLY_POP_WARNING
assert(capacity() >= minCapacity);
}
Char* expandNoinit(
const size_t delta,
bool expGrowth = false,
bool disableSSO = FBSTRING_DISABLE_SSO);
void push_back(Char c) { *expandNoinit(1, /* expGrowth = */ true) = c; }
size_t size() const {
size_t ret = ml_.size_;
if /* constexpr */ (kIsLittleEndian) {
// We can save a couple instructions, because the category is
// small iff the last char, as unsigned, is <= maxSmallSize.
typedef typename std::make_unsigned<Char>::type UChar;
auto maybeSmallSize = size_t(maxSmallSize) -
size_t(static_cast<UChar>(small_[maxSmallSize]));
// With this syntax, GCC and Clang generate a CMOV instead of a branch.
ret =
(static_cast<ptrdiff_t>(maybeSmallSize) >= 0) ? maybeSmallSize : ret;
} else {
ret = (category() == Category::isSmall) ? smallSize() : ret;
}
return ret;
}
size_t capacity() const {
FOLLY_PUSH_WARNING
FOLLY_CLANG_DISABLE_WARNING("-Wcovered-switch-default")
switch (category()) {
case Category::isSmall:
return maxSmallSize;
case Category::isLarge:
// For large-sized strings, a multi-referenced chunk has no
// available capacity. This is because any attempt to append
// data would trigger a new allocation.
if (RefCounted::refs(ml_.data_) > 1) {
return ml_.size_;
}
break;
case Category::isMedium:
default:
break;
}
FOLLY_POP_WARNING
return ml_.capacity();
}
bool isShared() const {
return category() == Category::isLarge && RefCounted::refs(ml_.data_) > 1;
}
private:
Char* c_str() {
Char* ptr = ml_.data_;
// With this syntax, GCC and Clang generate a CMOV instead of a branch.
ptr = (category() == Category::isSmall) ? small_ : ptr;
return ptr;
}
void reset() { setSmallSize(0); }
FOLLY_NOINLINE void destroyMediumLarge() noexcept {
auto const c = category();
assert(c != Category::isSmall);
if (c == Category::isMedium) {
free(ml_.data_);
} else {
RefCounted::decrementRefs(ml_.data_);
}
}
struct RefCounted {
std::atomic<size_t> refCount_;
Char data_[1];
constexpr static size_t getDataOffset() {
return offsetof(RefCounted, data_);
}
static RefCounted* fromData(Char* p) {
return static_cast<RefCounted*>(static_cast<void*>(
static_cast<unsigned char*>(static_cast<void*>(p)) -
getDataOffset()));
}
static size_t refs(Char* p) {
return fromData(p)->refCount_.load(std::memory_order_acquire);
}
static void incrementRefs(Char* p) {
fromData(p)->refCount_.fetch_add(1, std::memory_order_acq_rel);
}
static void decrementRefs(Char* p) {
auto const dis = fromData(p);
size_t oldcnt = dis->refCount_.fetch_sub(1, std::memory_order_acq_rel);
assert(oldcnt > 0);
if (oldcnt == 1) {
::free(dis);
}
}
static RefCounted* create(size_t* size) {
size_t capacityBytes;
if (!folly::checked_add(&capacityBytes, *size, size_t(1))) {
throw_exception(std::length_error(""));
}
if (!folly::checked_muladd(
&capacityBytes, capacityBytes, sizeof(Char), getDataOffset())) {
throw_exception(std::length_error(""));
}
const size_t allocSize = goodMallocSize(capacityBytes);
auto result = static_cast<RefCounted*>(checkedMalloc(allocSize));
result->refCount_.store(1, std::memory_order_release);
*size = (allocSize - getDataOffset()) / sizeof(Char) - 1;
return result;
}
static RefCounted* create(const Char* data, size_t* size) {
const size_t effectiveSize = *size;
auto result = create(size);
if (FOLLY_LIKELY(effectiveSize > 0)) {
fbstring_detail::podCopy(data, data + effectiveSize, result->data_);
}
return result;
}
static RefCounted* reallocate(
Char* const data,
const size_t currentSize,
const size_t currentCapacity,
size_t* newCapacity) {
assert(*newCapacity > 0 && *newCapacity > currentSize);
size_t capacityBytes;
if (!folly::checked_add(&capacityBytes, *newCapacity, size_t(1))) {
throw_exception(std::length_error(""));
}
if (!folly::checked_muladd(
&capacityBytes, capacityBytes, sizeof(Char), getDataOffset())) {
throw_exception(std::length_error(""));
}
const size_t allocNewCapacity = goodMallocSize(capacityBytes);
auto const dis = fromData(data);
assert(dis->refCount_.load(std::memory_order_acquire) == 1);
auto result = static_cast<RefCounted*>(smartRealloc(
dis,
getDataOffset() + (currentSize + 1) * sizeof(Char),
getDataOffset() + (currentCapacity + 1) * sizeof(Char),
allocNewCapacity));
assert(result->refCount_.load(std::memory_order_acquire) == 1);
*newCapacity = (allocNewCapacity - getDataOffset()) / sizeof(Char) - 1;
return result;
}
};
typedef uint8_t category_type;
enum class Category : category_type {
isSmall = 0,
isMedium = kIsLittleEndian ? 0x80 : 0x2,
isLarge = kIsLittleEndian ? 0x40 : 0x1,
};
Category category() const {
// works for both big-endian and little-endian
return static_cast<Category>(bytes_[lastChar] & categoryExtractMask);
}
struct MediumLarge {
Char* data_;
size_t size_;
size_t capacity_;
size_t capacity() const {
return kIsLittleEndian ? capacity_ & capacityExtractMask : capacity_ >> 2;
}
void setCapacity(size_t cap, Category cat) {
capacity_ = kIsLittleEndian
? cap | (static_cast<size_t>(cat) << kCategoryShift)
: (cap << 2) | static_cast<size_t>(cat);
}
};
union {
uint8_t bytes_[sizeof(MediumLarge)]; // For accessing the last byte.
Char small_[sizeof(MediumLarge) / sizeof(Char)];
MediumLarge ml_;
};
constexpr static size_t lastChar = sizeof(MediumLarge) - 1;
constexpr static size_t maxSmallSize = lastChar / sizeof(Char);
constexpr static size_t maxMediumSize = 254 / sizeof(Char);
constexpr static uint8_t categoryExtractMask = kIsLittleEndian ? 0xC0 : 0x3;
constexpr static size_t kCategoryShift = (sizeof(size_t) - 1) * 8;
constexpr static size_t capacityExtractMask = kIsLittleEndian
? ~(size_t(categoryExtractMask) << kCategoryShift)
: 0x0 /* unused */;
static_assert(
!(sizeof(MediumLarge) % sizeof(Char)),
"Corrupt memory layout for fbstring.");
size_t smallSize() const {
assert(category() == Category::isSmall);
constexpr auto shift = kIsLittleEndian ? 0 : 2;
auto smallShifted = static_cast<size_t>(small_[maxSmallSize]) >> shift;
assert(static_cast<size_t>(maxSmallSize) >= smallShifted);
return static_cast<size_t>(maxSmallSize) - smallShifted;
}
void setSmallSize(size_t s) {
// Warning: this should work with uninitialized strings too,
// so don't assume anything about the previous value of
// small_[maxSmallSize].
assert(s <= maxSmallSize);
constexpr auto shift = kIsLittleEndian ? 0 : 2;
small_[maxSmallSize] = char((maxSmallSize - s) << shift);
small_[s] = '\0';
assert(category() == Category::isSmall && size() == s);
}
void copySmall(const fbstring_core&);
void copyMedium(const fbstring_core&);
void copyLarge(const fbstring_core&);
void initSmall(const Char* data, size_t size);
void initMedium(const Char* data, size_t size);
void initLarge(const Char* data, size_t size);
void reserveSmall(size_t minCapacity, bool disableSSO);
void reserveMedium(size_t minCapacity);
void reserveLarge(size_t minCapacity);
void shrinkSmall(size_t delta);
void shrinkMedium(size_t delta);
void shrinkLarge(size_t delta);
void unshare(size_t minCapacity = 0);
Char* mutableDataLarge();
};
template <class Char>
inline void fbstring_core<Char>::copySmall(const fbstring_core& rhs) {
static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure");
static_assert(
offsetof(MediumLarge, size_) == sizeof(ml_.data_),
"fbstring layout failure");
static_assert(
offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_),
"fbstring layout failure");
// Just write the whole thing, don't look at details. In
// particular we need to copy capacity anyway because we want
// to set the size (don't forget that the last character,
// which stores a short string's length, is shared with the
// ml_.capacity field).
ml_ = rhs.ml_;
assert(category() == Category::isSmall && this->size() == rhs.size());
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::copyMedium(const fbstring_core& rhs) {
// Medium strings are copied eagerly. Don't forget to allocate
// one extra Char for the null terminator.
auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char));
ml_.data_ = static_cast<Char*>(checkedMalloc(allocSize));
// Also copies terminator.
fbstring_detail::podCopy(
rhs.ml_.data_, rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_);
ml_.size_ = rhs.ml_.size_;
ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
assert(category() == Category::isMedium);
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::copyLarge(const fbstring_core& rhs) {
// Large strings are just refcounted
ml_ = rhs.ml_;
RefCounted::incrementRefs(ml_.data_);
assert(category() == Category::isLarge && size() == rhs.size());
}
// Small strings are bitblitted
template <class Char>
inline void fbstring_core<Char>::initSmall(
const Char* const data, const size_t size) {
// Layout is: Char* data_, size_t size_, size_t capacity_
static_assert(
sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t),
"fbstring has unexpected size");
static_assert(
sizeof(Char*) == sizeof(size_t), "fbstring size assumption violation");
// sizeof(size_t) must be a power of 2
static_assert(
(sizeof(size_t) & (sizeof(size_t) - 1)) == 0,
"fbstring size assumption violation");
// If data is aligned, use fast word-wise copying. Otherwise,
// use conservative memcpy.
// The word-wise path reads bytes which are outside the range of
// the string, and makes ASan unhappy, so we disable it when
// compiling with ASan.
#ifndef FOLLY_SANITIZE_ADDRESS
if ((reinterpret_cast<size_t>(data) & (sizeof(size_t) - 1)) == 0) {
const size_t byteSize = size * sizeof(Char);
constexpr size_t wordWidth = sizeof(size_t);
switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words.
case 3:
ml_.capacity_ = reinterpret_cast<const size_t*>(data)[2];
[[fallthrough]];
case 2:
ml_.size_ = reinterpret_cast<const size_t*>(data)[1];
[[fallthrough]];
case 1:
ml_.data_ = *reinterpret_cast<Char**>(const_cast<Char*>(data));
[[fallthrough]];
case 0:
break;
}
} else
#endif
{
if (size != 0) {
fbstring_detail::podCopy(data, data + size, small_);
}
}
setSmallSize(size);
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::initMedium(
const Char* const data, const size_t size) {
// Medium strings are allocated normally. Don't forget to
// allocate one extra Char for the terminating null.
auto const allocSize = goodMallocSize((1 + size) * sizeof(Char));
ml_.data_ = static_cast<Char*>(checkedMalloc(allocSize));
if (FOLLY_LIKELY(size > 0)) {
fbstring_detail::podCopy(data, data + size, ml_.data_);
}
ml_.size_ = size;
ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
ml_.data_[size] = '\0';
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::initLarge(
const Char* const data, const size_t size) {
// Large strings are allocated differently
size_t effectiveCapacity = size;
auto const newRC = RefCounted::create(data, &effectiveCapacity);
ml_.data_ = newRC->data_;
ml_.size_ = size;
ml_.setCapacity(effectiveCapacity, Category::isLarge);
ml_.data_[size] = '\0';
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::unshare(size_t minCapacity) {
assert(category() == Category::isLarge);
size_t effectiveCapacity = std::max(minCapacity, ml_.capacity());
auto const newRC = RefCounted::create(&effectiveCapacity);
// If this fails, someone placed the wrong capacity in an
// fbstring.
assert(effectiveCapacity >= ml_.capacity());
// Also copies terminator.
fbstring_detail::podCopy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_);
RefCounted::decrementRefs(ml_.data_);
ml_.data_ = newRC->data_;
ml_.setCapacity(effectiveCapacity, Category::isLarge);
// size_ remains unchanged.
}
template <class Char>
inline Char* fbstring_core<Char>::mutableDataLarge() {
assert(category() == Category::isLarge);
if (RefCounted::refs(ml_.data_) > 1) { // Ensure unique.
unshare();
}
return ml_.data_;
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::reserveLarge(size_t minCapacity) {
assert(category() == Category::isLarge);
if (RefCounted::refs(ml_.data_) > 1) { // Ensure unique
// We must make it unique regardless; in-place reallocation is
// useless if the string is shared. In order to not surprise
// people, reserve the new block at current capacity or
// more. That way, a string's capacity never shrinks after a
// call to reserve.
unshare(minCapacity);
} else {
// String is not shared, so let's try to realloc (if needed)
if (minCapacity > ml_.capacity()) {
// Asking for more memory
auto const newRC = RefCounted::reallocate(
ml_.data_, ml_.size_, ml_.capacity(), &minCapacity);
ml_.data_ = newRC->data_;
ml_.setCapacity(minCapacity, Category::isLarge);
}
assert(capacity() >= minCapacity);
}
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::reserveMedium(
const size_t minCapacity) {
assert(category() == Category::isMedium);
// String is not shared
if (minCapacity <= ml_.capacity()) {
return; // nothing to do, there's enough room
}
if (minCapacity <= maxMediumSize) {
// Keep the string at medium size. Don't forget to allocate
// one extra Char for the terminating null.
size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char));
// Also copies terminator.
ml_.data_ = static_cast<Char*>(smartRealloc(
ml_.data_,
(ml_.size_ + 1) * sizeof(Char),
(ml_.capacity() + 1) * sizeof(Char),
capacityBytes));
ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium);
} else {
// Conversion from medium to large string
fbstring_core nascent;
// Will recurse to another branch of this function
nascent.reserve(minCapacity);
nascent.ml_.size_ = ml_.size_;
// Also copies terminator.
fbstring_detail::podCopy(
ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_);
nascent.swap(*this);
assert(capacity() >= minCapacity);
}
}
template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::reserveSmall(
size_t minCapacity, const bool disableSSO) {
assert(category() == Category::isSmall);
if (!disableSSO && minCapacity <= maxSmallSize) {
// small
// Nothing to do, everything stays put
} else if (minCapacity <= maxMediumSize) {
// medium
// Don't forget to allocate one extra Char for the terminating null
auto const allocSizeBytes =
goodMallocSize((1 + minCapacity) * sizeof(Char));
auto const pData = static_cast<Char*>(checkedMalloc(allocSizeBytes));
auto const size = smallSize();
// Also copies terminator.
fbstring_detail::podCopy(small_, small_ + size + 1, pData);
ml_.data_ = pData;
ml_.size_ = size;
ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium);
} else {
// large
auto const newRC = RefCounted::create(&minCapacity);
auto const size = smallSize();
// Also copies terminator.
fbstring_detail::podCopy(small_, small_ + size + 1, newRC->data_);
ml_.data_ = newRC->data_;
ml_.size_ = size;
ml_.setCapacity(minCapacity, Category::isLarge);
assert(capacity() >= minCapacity);
}
}
template <class Char>
inline Char* fbstring_core<Char>::expandNoinit(
const size_t delta,
bool expGrowth, /* = false */
bool disableSSO /* = FBSTRING_DISABLE_SSO */) {
// Strategy is simple: make room, then change size
assert(capacity() >= size());
size_t sz, newSz;
if (category() == Category::isSmall) {
sz = smallSize();
newSz = sz + delta;
if (!disableSSO && FOLLY_LIKELY(newSz <= maxSmallSize)) {
setSmallSize(newSz);
return small_ + sz;
}
reserveSmall(
expGrowth ? std::max(newSz, 2 * maxSmallSize) : newSz, disableSSO);
} else {
sz = ml_.size_;
newSz = sz + delta;
if (FOLLY_UNLIKELY(newSz > capacity())) {
// ensures not shared
reserve(expGrowth ? std::max(newSz, 1 + capacity() * 3 / 2) : newSz);
}
}
assert(capacity() >= newSz);
// Category can't be small - we took care of that above
assert(category() == Category::isMedium || category() == Category::isLarge);
ml_.size_ = newSz;
ml_.data_[newSz] = '\0';
assert(size() == newSz);
return ml_.data_ + sz;
}
template <class Char>
inline void fbstring_core<Char>::shrinkSmall(const size_t delta) {
// Check for underflow
assert(delta <= smallSize());
setSmallSize(smallSize() - delta);
}
template <class Char>
inline void fbstring_core<Char>::shrinkMedium(const size_t delta) {
// Medium strings and unique large strings need no special
// handling.
assert(ml_.size_ >= delta);
ml_.size_ -= delta;
ml_.data_[ml_.size_] = '\0';
}
template <class Char>
inline void fbstring_core<Char>::shrinkLarge(const size_t delta) {
assert(ml_.size_ >= delta);
// Shared large string, must make unique. This is because of the
// durn terminator must be written, which may trample the shared
// data.
if (delta) {
fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this);
}
// No need to write the terminator.
}
/**
* Dummy fbstring core that uses an actual std::string. This doesn't
* make any sense - it's just for testing purposes.
*/
template <class Char>
class dummy_fbstring_core {
public:
dummy_fbstring_core() {}
dummy_fbstring_core(const dummy_fbstring_core& another)
: backend_(another.backend_) {}
dummy_fbstring_core(const Char* s, size_t n) : backend_(s, n) {}
void swap(dummy_fbstring_core& rhs) { backend_.swap(rhs.backend_); }
const Char* data() const { return backend_.data(); }
Char* mutableData() { return const_cast<Char*>(backend_.data()); }
void shrink(size_t delta) {
assert(delta <= size());
backend_.resize(size() - delta);
}
Char* expandNoinit(size_t delta) {
auto const sz = size();
backend_.resize(size() + delta);
return backend_.data() + sz;
}
void push_back(Char c) { backend_.push_back(c); }
size_t size() const { return backend_.size(); }
size_t capacity() const { return backend_.capacity(); }
bool isShared() const { return false; }
void reserve(size_t minCapacity) { backend_.reserve(minCapacity); }
private:
std::basic_string<Char> backend_;
};
/**
* This is the basic_string replacement. For conformity,
* basic_fbstring takes the same template parameters, plus the last
* one which is the core.
*/
template <
typename E,
class T = std::char_traits<E>,
class A = std::allocator<E>,
class Storage = fbstring_core<E>>
class basic_fbstring {
static_assert(
std::is_same<A, std::allocator<E>>::value,
"fbstring ignores custom allocators");
template <typename Ex, typename... Args>
FOLLY_ALWAYS_INLINE static void enforce(bool condition, Args&&... args) {
if (!condition) {
throw_exception<Ex>(static_cast<Args&&>(args)...);
}
}
bool isSane() const {
return begin() <= end() && empty() == (size() == 0) &&
empty() == (begin() == end()) && size() <= max_size() &&
capacity() <= max_size() && size() <= capacity() &&
begin()[size()] == '\0';
}
struct Invariant {
Invariant& operator=(const Invariant&) = delete;
explicit Invariant(const basic_fbstring& s) noexcept : s_(s) {
assert(s_.isSane());
}
~Invariant() noexcept { assert(s_.isSane()); }
private:
const basic_fbstring& s_;
};
public:
// types
typedef T traits_type;
typedef typename traits_type::char_type value_type;
typedef A allocator_type;
typedef typename std::allocator_traits<A>::size_type size_type;
typedef typename std::allocator_traits<A>::difference_type difference_type;