-
Notifications
You must be signed in to change notification settings - Fork 1
/
compute_tilings.c
156 lines (132 loc) · 4.14 KB
/
compute_tilings.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include <errno.h>
#include <omp.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define PRECISION 70
#define PARALLEL_DEPTH 4
typedef struct PowerSeries { uint32_t coeffs[PRECISION]; } PowerSeries;
size_t CountBinDigits(size_t n) {
size_t result = 0;
while (n != 0) {
result += (n % 2);
n = n >> 1;
}
return result;
}
void Reverse(PowerSeries *data, size_t length) {
PowerSeries tmp;
for (size_t i = 0; i < length / 2; ++i) {
tmp = data[i];
data[i] = data[length - i - 1];
data[length - i - 1] = tmp;
}
}
uint32_t AddMod(uint32_t a, uint32_t b, uint32_t prime) {
size_t sum = (size_t)(a) + (size_t)(b);
return (uint32_t)( sum % prime );
}
void UpdateEntryAtPosition(PowerSeries *data, size_t ind, size_t bit, uint32_t prime) {
size_t nbit = bit >> 1;
if ( (bit & ind) != 0 ) {
for (size_t j = 0; j < PRECISION; ++j) {
data[bit ^ ind].coeffs[j] = AddMod( data[ind].coeffs[j], data[bit ^ ind].coeffs[j], prime );
}
if ( (nbit & ind) != 0 ) {
for (size_t j = 1; j < PRECISION; ++j) {
data[ind ^ bit ^ nbit].coeffs[j] = AddMod(data[ind].coeffs[j - 1], data[ind ^ bit ^ nbit].coeffs[j], prime);
}
}
}
}
void MultiplyByTransferMatrixAtPosition(PowerSeries *data, size_t height, size_t pos, uint32_t prime) {
size_t bit = (1ull) << (height - 1 - pos);
size_t nbit = bit >> 1;
size_t length = (1ull) << height;
if (height < 2 * PARALLEL_DEPTH + 3) {
for (size_t ind = 0; ind < length; ++ind) {
UpdateEntryAtPosition(data, ind, bit, prime);
}
} else {
size_t shard = 0;
size_t shards_number = (1 << PARALLEL_DEPTH);
size_t local_length = (length >> PARALLEL_DEPTH);
if (pos < PARALLEL_DEPTH) {
#pragma omp parallel for private(shard)
for (shard = 0; shard < shards_number; ++shard){
size_t tail = shard;
for (size_t i = 0; i < local_length; ++i) {
UpdateEntryAtPosition(data, tail + (i << PARALLEL_DEPTH), bit, prime);
}
}
} else {
#pragma omp parallel for private(shard)
for (shard = 0; shard < shards_number; ++shard){
size_t head = (size_t)(shard << (height - PARALLEL_DEPTH));
for (size_t i = 0; i < local_length; ++i) {
UpdateEntryAtPosition(data, i + head, bit, prime);
}
}
}
}
}
void MultiplyByTransferMatrix(PowerSeries *data, size_t height, uint32_t prime) {
size_t length = (1ull) << height;
Reverse(data, length);
for (size_t pos = 0; pos < height; ++pos) {
MultiplyByTransferMatrixAtPosition(data, height, pos, prime);
}
for (size_t i = 1; i < length; ++i) {
size_t bd = CountBinDigits(i);
for (size_t j = PRECISION - 1; j >= bd; --j) {
data[i].coeffs[j] = data[i].coeffs[j - bd];
}
for (size_t j = 0; j < bd; ++j) {
data[i].coeffs[j] = 0;
}
}
}
PowerSeries* CountTilings(size_t height, size_t min_width, size_t max_width, uint32_t prime) {
PowerSeries *result = (PowerSeries *)calloc(max_width - min_width + 1, sizeof(PowerSeries));
if (result == NULL) {
printf("Cannot calloc for the result\n");
return result;
}
size_t items_number = ((size_t)1) << height;
PowerSeries *data = (PowerSeries *)calloc( items_number, sizeof(PowerSeries) );
if (data == NULL) {
printf("Cannot calloc, %s \n", strerror(errno));
return result;
}
data[0].coeffs[0] = 1;
for (size_t i = 1; i <= max_width; ++i) {
MultiplyByTransferMatrix(data, height, prime);
if (i >= min_width) {
result[i - min_width] = data[0];
}
}
free(data);
return result;
}
void PrintPowerSeries(PowerSeries a) {
for (size_t i = 0; i < PRECISION; ++i) {
printf("%ju ", a.coeffs[i]);
}
}
int main(int argc, char *argv[]) {
size_t height = atoi(argv[1]);
size_t min_width = atoi(argv[2]);
size_t max_width = atoi(argv[3]);
size_t prime = atoi(argv[4]);
omp_set_num_threads(1 << PARALLEL_DEPTH);
omp_set_dynamic(0);
PowerSeries *result = CountTilings(height, min_width, max_width, prime);
for (size_t i = min_width; i <= max_width; ++i) {
PrintPowerSeries(result[i - min_width]);
printf(";");
}
free(result);
return 0;
}