-
Notifications
You must be signed in to change notification settings - Fork 33
/
brepnet_dataset.py
990 lines (798 loc) · 37.2 KB
/
brepnet_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
"""
The standard dataloader for BRepNet.
For each BRep model the following tensors are created.
Xf - The face features
A tensor of size [ num_faces x num_face_features ]
These correspond to the feature data extracted from faces.
See BRepNetExtractor.extract_face_features_from_body() in
pipeline/extract_brepnet_data_from_step.py for details of how the
values are computed.
The values will be standardized based on information from the
dataset file.
The faces are not in the same order as they are in Open Cascade.
For a batch of data you must use
Xf[split_batch[solid_index]["face_indices"]]
to restore these to their original order.
Xe - The edge features
A tensor of size [ num_edges x num_edge_features ]
See BRepNetExtractor.extract_edge_features_from_body() in
pipeline/extract_brepnet_data_from_step.py for details of how the
values are computed.
Xc - The coedge features
A tensor of size [ num_coedges x num_coedge_features ]
See BRepNetExtractor.extract_coedge_features_from_body() in
pipeline/extract_brepnet_data_from_step.py for details of how the
values are computed.
Kf - The face kernel tensor
This is an index tensor of size [ num_coedges x num_faces_in_kernel ]
For every coedge in the model the kernel will include some number of
nearby faces whos hidden states will be combined in the convolution.
This tensor contains the indices of these faces as they are in the
Xf array.
Ke - The edge kernel tensor
This is an index tensor of size [ num_coedges x num_edges_in_kernel ]
For every coedge in the model the kernel will include some number of
nearby edges whos hidden states will be combined in the convolution.
This tensor contains the indices of these edges.
Kc - The coedge kernel tensor
This is an index tensor of size [ num_coedges x num_coedges_in_kernel ]
For every coedge in the model the kernel will include some number of
nearby coedges whos hidden states will be combined in the convolution.
This tensor contains the indices of these coedges.
Ce - The coedges of edges tensor
This is an index tensor of size [ num_edges x 2]
For every edge in the model, this tensor contains the indices
of its two child coedges.
The tensor is required to perform pooling of coedge hidden
states onto the parent edges
Cf - The coedges of "small" faces tensor
There can be any arbitrary number of coedges around a single face.
When we perform pooling of the coedge hidden states into faces we
need to know which coedges are in the loops around each face.
This information is split into two sets of tensors. For faces with
less than max_coedges_per_face coedges on each face, the indices are
written into the Cf index tensor with size
[ num_small_faces x max_coedges_per_face ]
A special "padding" index which is equal to the number of coedges in
the model is used for faces which have less than max_coedges_per_face
coedges around the face.
The faces in the model get re-ordered so that the faces with less
than max_coedges_per_face coedges per face come before
the other faces.
Csf - The coedges of "large" faces
For faces which have more than max_coedges_per_face around them
the indices are stored in an array
Csf = [
Csf.size() = [ num_coedges_in_face_1 ],
Csf.size() = [ num_coedges_in_face_2 ],
...
]
In models/brepnet.py find_max_feature_vectors_for_each_face()
the Cf and Csf index tensors get used for the max pooling operation.
Notice that the re-ordering of the faces in Xf and Kf allows the new
hidden states to be efficiently computed by concatenating tensors.
labels - These are the segment indices for each face
Notice that these are also re-ordered as described above.
Use labels[split_batch[solid_index]["face_indices"]] to extract
the labels in the order of the faces in Open Cascade
file_stem - This tells you the stem of the filename of each solid in the
batch
Splitting batches
BRepNet processes multiple solids in batches. brepnet_collate_fn()
does the job of combining multiple solids into batches and keeps track
of how the faces, edges and coedges of each solid were combined into the
tensors for the batch. This information is contained in the split_batch
array.
For each solid in the batch, split_batch contains the indices in the batch
which must be accessed to understand the logits for faces, edges and coedges
split_batch = [
{
"face_indices": [ num_faces_for_solid_1],
"edge_indices": [ num_edges_for_solid_1],
"coedge_indices" [ num_coedges_for_solid_1]:
},
...
]
Given some logits for the batch they can be decoded like this
face_logits_for_solid_1 = face_logits_for_batch[split_batch[1]["face_indices"]]
"""
import torch
from torch.utils.data import Dataset
from pathlib import Path
import math
import numpy as np
import hashlib
import copy
import pickle
import utils.data_utils as data_utils
class BRepNetDataset(Dataset):
"""
Dataset which loads the processed step data generated by
pipeline/extract_brepnet_data_from_step.py
"""
def __init__(self, opts, train_val_or_test):
super(BRepNetDataset, self).__init__()
self.opts = opts
# Load the topological walks in to be used in the kernel
self.kernel = data_utils.load_json_data(self.opts.kernel)
# Load the list of input features to be used
self.feature_lists = data_utils.load_json_data(self.opts.input_features)
# The dataset file holds the full list of batches and the
# feature normalization information
dataset_info = data_utils.load_json_data(self.opts.dataset_file)
self.bodies = dataset_info[train_val_or_test]
self.feature_standardization = dataset_info["feature_standardization"]
self.dataset_dir = Path(self.opts.dataset_dir)
self.label_dir = self.find_label_dir(opts, train_val_or_test)
self.cache_dir = self.create_cache_dir(self.dataset_dir)
def __len__(self):
"""
Get the number of bodies in the dataset
"""
return len(self.bodies)
def __getitem__(self, idx):
"""
Load the data for a body. Use a cache if its there.
Cache the binary data if not.
"""
assert idx < len(self.bodies)
cache_pathname = self.get_cache_pathname(idx)
if cache_pathname.exists():
body_data = self.load_body_from_cache(cache_pathname)
else:
body_data = self.load_and_cache_body(idx, cache_pathname)
return body_data
def hash_strings_in_list(self, string_list):
"""
Create a hash of the strings in the list
"""
single_str = str().join(string_list)
single_str = single_str.encode('ascii', 'replace')
return hashlib.sha224(single_str).hexdigest()
def hash_data_for_body(self, body_filestem):
"""
We want to be sure that the cache is correctly built given
all the hyper-parameters of the network.
Here we make a hash of these hyper-parameters and
use this as the pathname for the cache
"""
string_list = [ body_filestem ]
for ent in self.kernel:
for walk in self.kernel[ent]:
string_list.append(walk)
for ent in self.feature_lists.values():
feature_list = list(ent)
sorted_feature_list = sorted(feature_list)
for feature in sorted_feature_list:
string_list.append(feature)
if self.label_dir is not None:
string_list.append("with_labels")
return self.hash_strings_in_list(string_list)
def get_cache_pathname(self, idx):
"""
Create a pathname for the cache file for the batch with idx
"""
body_filestem = self.bodies[idx]
hstring = self.hash_data_for_body(body_filestem)
return (self.cache_dir/hstring).with_suffix(".p")
def cache_body(self, cache_pathname, data):
"""
Save the cache data for the body
"""
torch.save(data, cache_pathname)
def load_body_from_cache(self, cache_pathname):
"""
Load cache data for the body with the given pathname
"""
return torch.load(cache_pathname)
def load_and_cache_body(self, idx, cache_pathname):
"""
Load the body with idx from the
STEP data, then save a cache of the
binary tensors
"""
body_data = self.load_body(idx)
self.cache_body(cache_pathname, body_data)
return body_data
def create_cache_dir(self, dataset_dir):
cache_dir = dataset_dir / "cache"
if not cache_dir.exists():
cache_dir.mkdir()
assert cache_dir.exists(), "Check we were able to create the cache dir"
return cache_dir
def find_label_dir(self, opts, train_val_or_test):
"""
Try to locate the dir where the labels are stored if this is not
given explicitely
"""
if opts.label_dir:
return Path(opts.label_dir)
assert len(self.bodies) > 0
first_file_stem = self.bodies[0]
dataset_dir = Path(opts.dataset_dir)
# Look for the seg files in the same folder as the
# step files. This assumes the intermediate files
# were created in a subfolder of the dataset folder
# as is done by the quickstart script
cadidate_label_file = dataset_dir.parent / (first_file_stem + ".seg")
if cadidate_label_file.exists():
print(f"Using labels from {dataset_dir}")
return dataset_dir.parent
# When using the Fusion Gallery segmentation dataset from the
# quickstart script, the seg files in the path
# s2.0.0/breps/seg and the dataset folder will be
# s2.0.0/processed
seg_dir = dataset_dir.parent / "breps/seg"
cadidate_label_file = seg_dir / (first_file_stem + ".seg")
if cadidate_label_file.exists():
print(f"Using labels from {seg_dir}")
return seg_dir
# When using the model to evaluate on data without labels
# the test set if used. Hence if we are building this
# datalaoder for the test set then we allow the labels to be
# None without asserting
if train_val_or_test == "test_set":
print(" ")
print("Warning!! - No labels are provided. This can happen when you")
print("are evaluating with a pre-trained model on an unlabelled dataset.")
print("Please disregard and accuracy and IoU values which get logged.")
print(" ")
return None
print(" ")
print("Error! Failed to find any label files.")
print("These files should have the extension .seg and contain the")
print("segment index for each face in each brep")
print(" ")
print("You can use the --label_dir option to point the model at the appropriate folder")
print(" ")
assert False, "Failed to locate labels (seg files)"
def load_body(self, idx):
"""
Load the data for a body.
"""
assert idx < len(self.bodies)
file_stem = self.bodies[idx]
npz_pathname = self.dataset_dir / (file_stem + ".npz")
body_data = data_utils.load_npz_data(npz_pathname)
Xf, Xe, Xc = self.build_input_feature_tensors(body_data)
Kf, Ke, Kc = self.build_kernel_tensors(body_data)
# Gf is the face point grids tensor in the order
# the faces appear in the solid
#
# Gc is the coedge point grids in the order the
# coedges appear in the solid. The points
# in the grid are ordered based on the topological
# direction of the coedge
#
# lcs is the local coordinate systems for each coedge
Gf, Gc, lcs = self.build_point_grids(body_data)
# We need to rearrange the order of the faces so that
# faces with more than max_coedges_per_face are at the
# end of the array
max_coedges_per_face = 30
Ce = self.build_coedges_of_edges_tensor(body_data)
# Try building point grids from the left coedges
# of each edge
Ge = self.build_edge_grids_from_left_coedges(Gc, Ce, body_data)
Cf, Csf, new_to_old_face_indices = self.build_coedges_of_faces_tensor(
body_data,
max_coedges_per_face
)
old_to_new_face_indices = self.find_inverse_permutation(new_to_old_face_indices)
Kf_perm = old_to_new_face_indices[Kf]
Xf_perm = Xf[new_to_old_face_indices]
Gf_perm = Gf[new_to_old_face_indices]
# If we are evaluating a pre-trained model on a dataset
# with no labels then the label_dir will be none. In this
# case we provide some "dummy" label values here.
if self.label_dir is not None:
labels = self.load_labels(file_stem)
labels_perm = labels[new_to_old_face_indices]
else:
labels_perm = torch.zeros(Xf_perm.size(0), dtype=torch.int64)
data = {
"face_features": Xf_perm,
"face_point_grids": Gf_perm,
"edge_features": Xe,
"edge_point_grids": Ge,
"coedge_features": Xc,
"coedge_point_grids": Gc,
"coedge_lcs": lcs,
"face_kernel_tensor": Kf_perm,
"edge_kernel_tensor": Ke,
"coedge_kernel_tensor": Kc,
"coedges_of_edges": Ce,
"coedges_of_small_faces": Cf,
"coedges_of_big_faces": Csf,
"labels": labels_perm,
"old_to_new_face_indices": old_to_new_face_indices,
"file_stem": file_stem
}
return data
def build_kernel_tensors(self, body_data):
n = body_data["coedge_to_next"]
m = body_data["coedge_to_mate"]
e = body_data["coedge_to_edge"]
f = body_data["coedge_to_face"]
# The permutation array n, m, e and f have the following meanings
# For a coedge with index c:
# The next coedge around the loop is n[c]
# The mating coedge is m[c]
# The index of the parent edge is e[c]
# The index of the parent face is f[c]
# All these arrays should have the same size. i.e. the number of coedges
# in the body
num_coedges = n.size
assert num_coedges == m.size
assert num_coedges == e.size
assert num_coedges == f.size
# Now we need to recover the permutation p with the meaning
# p[c] is index of the previous coedge in the loop to the
# coedge with index c. This is a little like finding the
# inverse (also the transpose) of a permutation matrix.
p = self.find_inverse_permutation(n)
# Now we can build the kernel tensors for the faces, edges and coedges
Kf = self.build_kernel_tensor_from_topology(n, p, m, e, f, self.kernel["faces"])
Ke = self.build_kernel_tensor_from_topology(n, p, m, e, f, self.kernel["edges"])
Kc = self.build_kernel_tensor_from_topology(n, p, m, e, f, self.kernel["coedges"])
return Kf, Ke, Kc
def build_input_feature_tensors(self, body_data):
"""
Convert the feature tensors for faces, edges and coedges
from numpy to pytorch
"""
Xf = torch.from_numpy(body_data["face_features"])
Xe = torch.from_numpy(body_data["edge_features"])
Xc = torch.from_numpy(body_data["coedge_features"])
Xf = self.standardize_features(
Xf,
self.feature_standardization["face_features"]
)
Xe = self.standardize_features(
Xe,
self.feature_standardization["edge_features"]
)
Xc = self.standardize_features(
Xc,
self.feature_standardization["coedge_features"]
)
return Xf, Xe, Xc
def standardize_features(self, feature_tensor, stats):
num_features = len(stats)
assert feature_tensor.size(1) == num_features
means = torch.zeros(num_features, dtype=feature_tensor.dtype)
sds = torch.zeros(num_features, dtype=feature_tensor.dtype)
eps = 1e-7
for index, s in enumerate(stats):
assert s["standard_deviation"] > eps, "Feature has zero standard deviation"
means[index] = s["mean"]
sds[index] = s["standard_deviation"]
# We need to broadcast means and sds over the number of entities
means.unsqueeze(0)
sds.unsqueeze(0)
feature_tensor_zero_mean = feature_tensor - means
feature_tensor_standadized = feature_tensor_zero_mean / sds
# Test code to check this works
num_ents = feature_tensor.size(0)
test_tensor = torch.zeros((num_ents, num_features), dtype=feature_tensor.dtype)
for i in range(num_ents):
for j in range(num_features):
value = (feature_tensor[i,j] - means[j])/sds[j]
test_tensor[i,j] = value
assert torch.allclose(feature_tensor_standadized, test_tensor, eps)
# Convert the tensors to floats after standardization
return feature_tensor_standadized.float()
def build_point_grids(self, body_data):
"""
Read the point grid and LCS data and convert it to
pytorch
"""
face_point_grids = torch.from_numpy(body_data["face_point_grids"])
coedge_point_grids = torch.from_numpy(body_data["coedge_point_grids"])
coedge_lcs = torch.from_numpy(body_data["coedge_lcs"])
return face_point_grids.float(), coedge_point_grids.float(), coedge_lcs.float()
def build_edge_grids_from_left_coedges(self, Gc, Ce, body_data):
"""
Try building point grids from the left coedges
of each edge
"""
# Lets do this the slow way to start with
num_edges = Ce.size(0)
left_coedge_indices = torch.zeros(num_edges, dtype=torch.int64)
reverse_flags = body_data["coedge_reverse_flags"]
for edge_index in range(num_edges):
coedges = Ce[edge_index, :]
first_coedge_index = coedges[0]
second_coedge_index = coedges[1]
if reverse_flags[second_coedge_index]==1:
left_coedge_indices[edge_index] = first_coedge_index
else:
left_coedge_indices[edge_index] = second_coedge_index
return Gc[left_coedge_indices]
def build_kernel_tensor_from_topology(self, n, p, m, e, f, kernel):
"""
A BRepNet kernel is defined by a list of topological walks.
These are used to create the index tensors Kf, Ke and Kc
for faces edges and coedges separately.
The index tensor Kf, Ke and Kc are essentially permutations
on the arrays of coedges. The BRepNet paper describes these
as permutation matrices, but actually we don't need a full
matrix.
This function build the permutations as follows.
- We start off with just a list of indices in order
[ 0, 1, 2, ... num_coedges-1]
- For each entity in the kernel we need to find its index.
This will be found by following a topological walk.
The list of instructions in the walk is defined in the kernel.
To execute one instruction we simple use the current coedge
indices as the index of either the next, previous, mate,
edge or face permutation. i.e.
c = n[c]
will set the permutations in c to c->next()
- We do this for all the instructions in the walk.
"""
# The permutation array n, p, m, e and f have the following meanings
# For a coedge with index c:
# The next coedge around the loop is n[c]
# The previous coedge around the loop is p[c]
# The mating coedge is m[c]
# The index of the parent edge is e[c]
# The index of the parent face is f[c]
# Each of the arrays n, p, m, e and f should have the same
# length. i.e. the number of coedges in the B-Rep
num_coedges = n.size
assert num_coedges == p.size
assert num_coedges == m.size
assert num_coedges == e.size
assert num_coedges == f.size
# We want to build an integer tensor of size
# [ num_coedges x num_entities_in_kernel ]
# We will build this one column at a time
kernel_tensor_cols = []
for walk_instructions in kernel:
# This is like starting with the identity matrix.
# The identity permutation is just the indices
# [0, 1, 2, ...]
c = np.arange(num_coedges, dtype=n.dtype)
# Loop over the instructions and execute each one
for instruction in walk_instructions:
if instruction == "n":
c = n[c]
elif instruction == "p":
c = p[c]
elif instruction == "m":
c = m[c]
elif instruction == "f":
c = f[c]
elif instruction == "e":
c = e[c]
else:
assert False, "Unknown instruction"
kernel_tensor_col = torch.from_numpy(c.astype(dtype=np.int64))
kernel_tensor_cols.append(kernel_tensor_col)
kernel_tensor = torch.transpose(torch.stack(kernel_tensor_cols), 0, 1)
assert kernel_tensor.size(0) == num_coedges
assert kernel_tensor.size(1) == len(kernel)
return kernel_tensor
def find_face_permutation(self, body_data, max_coedges_per_face):
"""
We will need to rearrange the array of faces so that
faces with more than max_coedges_per_face coedges
appear at the end of the faces array
"""
num_faces = body_data["face_features"].shape[0]
coedge_to_face = body_data["coedge_to_face"]
coedges_per_face = np.bincount(coedge_to_face)
small_face_indices = np.where(coedges_per_face <= max_coedges_per_face)[0]
big_face_indices = np.where(coedges_per_face > max_coedges_per_face)[0]
if small_face_indices.size == 0:
face_permutation = big_face_indices
elif big_face_indices.size == 0:
face_permutation = small_face_indices
else:
face_permutation = np.concatenate((small_face_indices, big_face_indices))
return face_permutation, small_face_indices.size
def build_coedges_of_edges_tensor(self, body_data):
"""
We want to build an index tensor Ce with
Ce.size() = [ num_edges x 2]
The ith row of Ce contains the indices of the two
coedges belonging to the ith parent edge
"""
coedge_to_edge = body_data["coedge_to_edge"]
num_edges = body_data["edge_features"].shape[0]
coedges_of_edges = [ [] for i in range(num_edges)]
for coedge_index, edge_index in enumerate(coedge_to_edge):
coedges_of_edges[edge_index].append(coedge_index)
for coedges in coedges_of_edges:
assert len(coedges) == 1 or len(coedges) == 2
if len(coedges) == 1:
# OK. This is the special case of a sphere. Here
# we have two coedges at te poles. They link to themselves.
# As they are only used one each they don't have a second
# coedge in the coedges_of_edges array. We add the same
# coedge twice here
coedges.append(coedges[0])
coedges_of_edges = torch.tensor(coedges_of_edges, dtype=torch.int64)
return coedges_of_edges
def build_coedges_of_faces_tensor(
self,
body_data,
max_coedges_per_face
):
"""
For faces with less than or equal to max_coedges_per_face we
want to build a tensor
Cf.size() = [ num_small_faces x max_coedges ]
The tensor is padded with the index num_coedges. A row of
zeros will be concatenated to the mlp output tensor to allow
this padding to work.
For faces with more than max_coedges coedges we have
an array of tensors of different sizes
Csf = [
Csf.size() = [ num_coedges_in_face_1 ],
...
]
"""
coedge_to_face = body_data["coedge_to_face"]
num_faces = body_data["face_features"].shape[0]
num_coedges = coedge_to_face.size
face_to_coedges = {}
for coedge_index, face_index in enumerate(coedge_to_face):
if not face_index in face_to_coedges:
face_to_coedges[face_index] = []
face_to_coedges[face_index].append(coedge_index)
small_face_indices = []
big_face_indices = []
Cf = []
Csf = []
for face_index in range(num_faces):
assert face_index in face_to_coedges
assert len(face_to_coedges[face_index]) > 0
if len(face_to_coedges[face_index]) > max_coedges_per_face:
# This is the case where we have faces with lots of
# coedges around them. We place these long lists of
# indices into an index tensor for each face
Csf.append(torch.tensor(face_to_coedges[face_index], dtype=torch.int64))
big_face_indices.append(face_index)
else:
# This is the case where we have only a small number
# of coedges around a face. We want to build an
# index tensor
#
# Cf.size() = [ num_small_faces x max_coedges ]
#
# with the rows padded with the value 'num_coedges'
coedges_of_face = face_to_coedges[face_index]
# We need to pad the array with the value 'num_coedges'
padding_size = max_coedges_per_face - len(coedges_of_face)
coedges_of_face.extend([num_coedges] * padding_size)
# Append the row to the list. We will stack this once it
# has been accumulated
Cf.append(torch.tensor(coedges_of_face, dtype=torch.int64))
small_face_indices.append(face_index)
# Stack the rows for the "small faces"
Cf = torch.stack(Cf)
# Finally we need to define a "permutation" index tensor. We want
# re-arrange the face features Xf and and face indices in Kf so that the
# big faces come at the end of the tensors
small_face_indices = torch.tensor(small_face_indices, dtype=torch.int64)
big_face_indices = torch.tensor(big_face_indices, dtype=torch.int64)
face_permutation = torch.cat([small_face_indices, big_face_indices])
return Cf, Csf, face_permutation
def load_labels(self, file_stem):
"""
Load the segmentation from the seg file
"""
label_pathname = self.label_dir / (file_stem + ".seg")
face_labels = np.loadtxt(label_pathname, dtype=np.int64)
face_labels_tensor = torch.from_numpy(face_labels)
if face_labels_tensor.ndim == 0:
face_labels_tensor = torch.unsqueeze(face_labels_tensor, 0)
return face_labels_tensor
def find_inverse_permutation(self, perm):
assert perm.ndim == 1
# We create the identity permutation. i.e. [0, 1, 2, ...]
if isinstance(perm, np.ndarray):
identity = np.arange(perm.size, dtype=perm.dtype)
inv_perm = np.zeros(perm.size, dtype=perm.dtype)
else:
assert isinstance(perm, torch.Tensor)
identity = torch.arange(end=perm.size(0), dtype=perm.dtype)
inv_perm = torch.zeros(perm.size(0), dtype=perm.dtype)
# Now we know that inv_perm[perm] is the identity.
# We can "assign" values of inv_perm[perm] from the
# identity array
inv_perm[perm] = identity
return inv_perm
def unsqueeze_single_dim_tensors(tensor_list):
unsqueezed = []
for t in tensor_list:
if t.ndim == 1:
unsqueezed.append(torch.unsqueeze(t, 0))
else:
unsqueezed.append(t)
return unsqueezed
def concatenate_tensor_arrays(small, big, unsqueeze):
all = []
if unsqueeze:
small = unsqueeze_single_dim_tensors(small)
big = unsqueeze_single_dim_tensors(big)
all.extend(small)
all.extend(big)
return torch.cat(all)
def add_offset_to_face_index(
face_indices,
num_small_faces,
small_face_offset,
big_face_offset
):
face_index_offset = small_face_offset*(face_indices < num_small_faces)
# The array was re-ordered so that small faces are always before
# big faces.
# The first big face has index "num_small_faces" are we need to map this to
# big_face_offset
face_index_offset += (big_face_offset-num_small_faces)*(face_indices >= num_small_faces)
return face_indices + face_index_offset
def add_offset_to_coedge_index_with_padding(
padded_coedge_indices,
pad_value,
coedge_index_offset,
new_pad_value
):
"""
In the coedges_of_small_faces array we have the indices
of coedges with padding at the end of the array. The
value of the padding in the input tensor is pad_value.
We want to change the values of the valid coedge indices
and the padding separately. The valid values want to
be increased by `coedge_index_offset`. The padding
needs to be replaced by `new_pad_value`
"""
coedge_index_offset = coedge_index_offset*(padded_coedge_indices != pad_value)
coedge_index_offset += (new_pad_value-pad_value)*(padded_coedge_indices == pad_value)
return padded_coedge_indices + coedge_index_offset
def brepnet_collate_fn(data_list):
"""
Collate the data from multiple bodies into a single
set of tensors.
Here I call the faces with a small number of coedges
"small faces" and the coedges with a large number of
coedges "big faces".
"""
Xf_small_faces = []
Xf_big_faces = []
Gf_small_faces = []
Gf_big_faces = []
labels_small_faces = []
labels_big_faces = []
Xe = []
Ge = []
Xc = []
Gc = []
lcs = []
Ke = [] # Edge indices for each coedge
Kc = [] # Coedge indices for each coedge
Ce = [] # Coedge indices for coedges owned by each edge
Csf = [] # Coedge indices for coedges owned by "big faces"
# Keep track of which file each B-Rep came from
file_stems = []
# Keep track of the data we need to split the batch
split_batch = []
# We need to make two passes through the data.
# In the first pass we process things which depend on
# coedge index and small faces. In the second pass
# we work on modifying indices which depend on big faces
face_offset = 0
edge_offset = 0
coedge_offset = 0
for data in data_list:
# These are input features. We just wan to concatentate
# the tensors for each B-Rep
num_edges = data["edge_features"].shape[0]
Xe.append(data["edge_features"])
Ge.append(data["edge_point_grids"])
num_coedges = data["coedge_features"].shape[0]
Xc.append(data["coedge_features"])
Gc.append(data["coedge_point_grids"])
lcs.append(data["coedge_lcs"])
# For edge and coedge indices things are easy. We just need to
# add the offsets to the arrays
Ke.append(data["edge_kernel_tensor"] + edge_offset)
Ce.append(data["coedges_of_edges"] + coedge_offset)
Kc.append(data["coedge_kernel_tensor"] + coedge_offset)
for single_face_coedges in data["coedges_of_big_faces"]:
Csf.append(single_face_coedges + coedge_offset)
# Face features are a little more complicated. We want
# to keep the faces with a small number of coedges
# at the start of the array and the faces with a large
# number of coedges in another array which will get
# appended to the end.
num_small_faces = data["coedges_of_small_faces"].shape[0]
num_big_faces = len(data["coedges_of_big_faces"])
Xf = data["face_features"]
Gf = data["face_point_grids"]
assert num_small_faces + num_big_faces == Xf.shape[0]
assert num_small_faces + num_big_faces == Gf.shape[0]
# We need to slice the face feature tensor
Xf_small_faces.append(Xf[:num_small_faces])
Xf_big_faces.extend(Xf[num_small_faces:])
Gf_small_faces.append(Gf[:num_small_faces])
Gf_big_faces.append(Gf[num_small_faces:])
labels = data["labels"]
labels_small_faces.append(labels[:num_small_faces])
labels_big_faces.append(labels[num_small_faces:])
file_stems.append(data["file_stem"])
split_batch_data_for_brep = {
"edge_indices": torch.arange(edge_offset, edge_offset+num_edges, dtype=torch.int64),
"coedge_indices": torch.arange(coedge_offset, coedge_offset+num_coedges, dtype=torch.int64)
}
split_batch.append(split_batch_data_for_brep)
face_offset += num_small_faces
edge_offset += num_edges
coedge_offset += num_coedges
# This is the second pass through the data. We need to
# set the indices of faces in Kf and the new_indices_of_brep_faces
# for each face in each B-Rep
Kf = []
# The coedge indices for coedges owned by "small faces" also needs to
# be processed here as we need to apply different values to the
# coedge indices and the padding
new_padding_value = coedge_offset
coedge_offset = 0
Cf = []
small_face_offset = 0
for solid_index, data in enumerate(data_list):
num_small_faces = data["coedges_of_small_faces"].shape[0]
num_big_faces = len(data["coedges_of_big_faces"])
old_to_new_face_index = data["old_to_new_face_indices"]
num_coedges = data["coedge_features"].shape[0]
# We need to add on the offsets for the new face indices
# Here this is done for the indices which allow us to get
# back from the combined batch to the original face indices
# in each B-Rep
offset_old_to_new_face_index = add_offset_to_face_index(
old_to_new_face_index,
num_small_faces,
small_face_offset,
face_offset
)
offset_coedges_of_small_faces = add_offset_to_coedge_index_with_padding(
data["coedges_of_small_faces"],
num_coedges,
coedge_offset,
new_padding_value
)
Cf.append(offset_coedges_of_small_faces)
split_batch[solid_index]["face_indices"] = offset_old_to_new_face_index
# Here we add on the offsets for the kernel Kf
brep_Kf = data["face_kernel_tensor"]
offset_Kf = add_offset_to_face_index(
brep_Kf,
num_small_faces,
small_face_offset,
face_offset
)
Kf.append(offset_Kf)
small_face_offset += num_small_faces
face_offset += num_big_faces
coedge_offset += num_coedges
batch_data = {
"face_features": concatenate_tensor_arrays(Xf_small_faces, Xf_big_faces, unsqueeze=True),
"face_point_grids": concatenate_tensor_arrays(Gf_small_faces, Gf_big_faces, unsqueeze=True),
"edge_features": torch.cat(Xe),
"edge_point_grids": torch.cat(Ge),
"coedge_features": torch.cat(Xc),
"coedge_point_grids": torch.cat(Gc),
"coedge_lcs": torch.cat(lcs),
"face_kernel_tensor": torch.cat(Kf),
"edge_kernel_tensor": torch.cat(Ke),
"coedge_kernel_tensor": torch.cat(Kc),
"coedges_of_edges": torch.cat(Ce),
"coedges_of_small_faces": torch.cat(Cf),
"coedges_of_big_faces": Csf,
"labels": concatenate_tensor_arrays(labels_small_faces, labels_big_faces, unsqueeze=False),
"split_batch": split_batch,
"file_stems": file_stems
}
return batch_data