Skip to content

Commit

Permalink
Bug fixes for 0.3.3 (#61)
Browse files Browse the repository at this point in the history
- Fixed edge case for NLPAR chunking of scans that would lead to a crash.
- Fixed issue where PyEBSDIndex would not use all GPUs by default.
- ``IPFColor.makeipf()`` will now automatically read the number of columns/rows in the scan from the file defined in the indexer object.
Signed-off by: David Rowenhorst <[email protected]>
  • Loading branch information
drowenhorst-nrl authored Jun 7, 2024
1 parent 5472e8a commit 5d84da3
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 464 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ Changelog
All notable changes to PyEBSDIndex will be documented in this file. The format is based
on `Keep a Changelog <https://keepachangelog.com/en/1.1.0>`_.

0.3.3 (2024-06-07)
==================

Fixed
-----
- Fixed edge case for NLPAR chunking of scans that would lead to a crash.
- Fixed issue where PyEBSDIndex would not use all GPUs by default.
- ``IPFColor.makeipf()`` will now automatically read the number of columns/rows in the scan from the file defined in the indexer object.



0.3.2 (2024-05-31)
==================

Expand Down
474 changes: 62 additions & 412 deletions doc/tutorials/ebsd_index_demo.ipynb

Large diffs are not rendered by default.

25 changes: 17 additions & 8 deletions pyebsdindex/EBSDImage/IPFcolor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,26 @@ def makeipf(ebsddata, indexer, vector=np.array([0,0,1.0]), xsize = None, ysize =

if xsize is not None:
xsize = int(xsize)
if ysize is None:
ysize = int(npoints // xsize + np.int64((npoints % xsize) > 0))
#if ysize is None:
#print(ysize)
else:
xsize = int(npoints)
ysize = 1
xsize = indexer.fID.nCols
#xsize = int(npoints)
#ysize = 1

npts = int(npoints)
if int(xsize*ysize) < npoints:
npts = int(xsize*ysize)
ipf_out = ipfout[0:npts,:].reshape(ysize, xsize,3)
if ysize is not None:
ysize = int(ysize)
else:
ysize = int(npoints // xsize + np.int64((npoints % xsize) > 0))


ipf_out = np.zeros((ysize, xsize,3), dtype=np.float32)
ipf_out = ipf_out.flatten()
npts = min(int(npoints), int(xsize*ysize))
# if int(xsize*ysize) < npoints:
# npts = int(xsize*ysize)
ipf_out[0:npts*3] = ipfout[0:npts,:].flatten()
ipf_out = ipf_out.reshape(ysize, xsize, 3)
return ipf_out


Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
]
__description__ = "Python based tool for Radon based EBSD indexing"
__name__ = "pyebsdindex"
__version__ = "0.3.2"
__version__ = "0.3.3"


# Try to import only once - also will perform check that at least one GPU is found.
Expand Down
1 change: 1 addition & 0 deletions pyebsdindex/_ebsd_index_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ def index_pats_distributed(
else:
if ngpu is None:
ngpu = len(clparam.gpu)
gpu_id = np.arange(ngpu, dtype=int)
cudagpuvis = ''
for cdgpu in range(len(clparam.gpu)):
cudagpuvis += str(cdgpu)+','
Expand Down
77 changes: 39 additions & 38 deletions pyebsdindex/nlpar_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def calcnlpar(self, chunksize=0, searchradius=None, lam = None, dthresh = None,
rescale = False

nthreadpos = numba.get_num_threads()
#numba.set_num_threads(36)
#numba.set_num_threads(18)
colstartcount = np.asarray([0,ncols],dtype=np.int64)
if verbose >= 1:
print("lambda:", self.lam, "search radius:", self.searchradius, "dthresh:", self.dthresh)
Expand Down Expand Up @@ -756,50 +756,51 @@ def _calcchunks(self, patdim, ncol, nrow, target_bytes=2e9, col_overlap=0, row_o
rowstepov = min(rowstep + 2 * row_overlap, nrow)

# colchunks = np.round(np.arange(ncolchunks+1)*ncol/ncolchunks).astype(int)
colchunks = np.zeros((ncolchunks, 2), dtype=int)
colchunks[:, 0] = (np.arange(ncolchunks) * colstep).astype(int)
colchunks[:, 1] = colchunks[:, 0] + colstepov - int(col_overlap)
colchunks[:, 0] -= col_overlap
colchunks[0, 0] = 0;

for i in range(ncolchunks - 1):
if colchunks[i + 1, 0] >= ncol:
colchunks = colchunks[0:i + 1, :]

ncolchunks = colchunks.shape[0]
# colchunks = np.zeros((ncolchunks, 2), dtype=int)
# colchunks[:, 0] = (np.arange(ncolchunks) * colstep).astype(int)
# colchunks[:, 1] = colchunks[:, 0] + colstepov - int(col_overlap)
# colchunks[:, 0] -= col_overlap
# colchunks[0, 0] = 0;

colchunks = []
col_overlap = int(col_overlap)
for c in range(ncolchunks):
cchunk = [int(c * colstep) - col_overlap, int(c * colstep + colstepov) - col_overlap]
colchunks.append(cchunk)
if cchunk[1] > ncol:
break

ncolchunks = len(colchunks)
colchunks = np.array(colchunks, dtype=int)
colchunks[0, 0] = 0
colchunks[-1, 1] = ncol

if ncolchunks > 1:
colchunks[-1, 0] = max(0, colchunks[-2, 1] - col_overlap)

colchunks += col_offset

# colproc = np.zeros((ncolchunks, 2), dtype=int)
# if ncolchunks > 1:
# colproc[1:, 0] = col_overlap
# if ncolchunks > 1:
# colproc[0:, 1] = colchunks[:, 1] - colchunks[:, 0] - col_overlap
# colproc[-1, 1] = colchunks[-1, 1] - colchunks[-1, 0]

# rowchunks = np.round(np.arange(nrowchunks + 1) * nrow / nrowchunks).astype(int)
rowchunks = np.zeros((nrowchunks, 2), dtype=int)
rowchunks[:, 0] = (np.arange(nrowchunks) * rowstep).astype(int)
rowchunks[:, 1] = rowchunks[:, 0] + rowstepov - int(row_overlap)
rowchunks[:, 0] -= row_overlap
rowchunks[0, 0] = 0;

for i in range(nrowchunks - 1):
if rowchunks[i + 1, 0] >= nrow:
rowchunks = rowchunks[0:i + 1, :]

nrowchunks = rowchunks.shape[0]
# for i in range(ncolchunks - 1):
# if colchunks[i + 1, 0] >= ncol:
# colchunks = colchunks[0:i + 1, :]

rowchunks = []
row_overlap = int(row_overlap)
for r in range(nrowchunks):
rchunk = [int(r * rowstep) - row_overlap, int(r * rowstep + rowstepov) - row_overlap]
rowchunks.append(rchunk)
if rchunk[1] > nrow:
break

nrowchunks = len(rowchunks)
rowchunks = np.array(rowchunks, dtype=int)
rowchunks[0, 0] = 0
rowchunks[-1, 1] = nrow

rowchunks += row_offset
if nrowchunks > 1:
rowchunks[-1, 0] = max(0, rowchunks[-2, 1] - row_overlap)

# rowproc = np.zeros((nrowchunks, 2), dtype=int)
# if nrowchunks > 1:
# rowproc[1:, 0] = row_overlap
# if nrowchunks > 1:
# rowproc[0:, 1] = rowchunks[:, 1] - rowchunks[:, 0] - row_overlap
# rowproc[-1, 1] = rowchunks[-1, 1] - rowchunks[-1, 0]
rowchunks += row_offset

return ncolchunks, nrowchunks, colchunks, rowchunks

Expand Down
11 changes: 7 additions & 4 deletions pyebsdindex/opencl/nlpar_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def loptfunc(lam, d2, tw, dthresh):
lamopt_values = []

sigma, d2, n2 = self.calcsigma(nn=1, saturation_protect=saturation_protect, automask=automask, normalize_d=True,
return_nndist=True)
return_nndist=True, **kwargs)

#sigmapad = np.pad(sigma, 1, mode='reflect')
#d2normcl(d2, n2, sigmapad)
Expand All @@ -133,7 +133,7 @@ def loptfunc(lam, d2, tw, dthresh):
return lamopt_values.flatten()


def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=False, gpu_id = None, **kwargs):
def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=False, gpu_id = None, verbose = 2, **kwargs):
self.sigmann = nn
if self.sigmann > 7:
print("Sigma optimization search limited to a search radius <= 7")
Expand Down Expand Up @@ -222,7 +222,8 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa
#count_local = cl.LocalMemory(nnn*npadmx*4)
count_local = cl.Buffer(ctx, mf.READ_WRITE, size=int(mxchunk * nnn * 4))
countchunk = np.zeros((mxchunk, nnn), dtype=np.float32)

ndone = 0
nchunks = int(chunks[1] * chunks[0])
for rowchunk in range(chunks[1]):
rstart = chunks[3][rowchunk, 0]
rend = chunks[3][rowchunk, 1]
Expand Down Expand Up @@ -289,7 +290,9 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa
countnn[rstart:rend, cstart:cend] = countchunk[0:int(ncolchunk*nrowchunk), :].reshape(nrowchunk, ncolchunk, nnn)
dist[rstart:rend, cstart:cend] = distchunk[0:int(ncolchunk*nrowchunk), :].reshape(nrowchunk, ncolchunk, nnn)
sigma[rstart:rend, cstart:cend] = np.minimum(sigma[rstart:rend, cstart:cend], sigmachunk)

if verbose >= 2:
print("tiles complete: ", ndone, "/", nchunks, sep='', end='\r')
ndone +=1
dist_local.release()
count_local.release()
datapad_gpu.release()
Expand Down
2 changes: 1 addition & 1 deletion pyebsdindex/opencl/nlpar_clray.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
rescale = rescale,
gpu_id= gpu_id)

target_mem = clparams.gpu[gpu_id].max_mem_alloc_size//3
target_mem = clparams.gpu[gpu_id].max_mem_alloc_size//6
max_mem = clparams.gpu[gpu_id].global_mem_size*0.4
if target_mem*ngpuwrker > max_mem:
target_mem = max_mem/ngpuwrker
Expand Down

0 comments on commit 5d84da3

Please sign in to comment.