Bug fixes for 0.3.3 (#61)

- Fixed edge case for NLPAR chunking of scans that would lead to a crash. - Fixed issue where PyEBSDIndex would not use all GPUs by default. - ``IPFColor.makeipf()`` will now automatically read the number of columns/rows in the scan from the file defined in the indexer object. Signed-off by: David Rowenhorst <[email protected]>
USNavalResearchLaboratory · Jun 7, 2024 · 5d84da3 · 5d84da3
1 parent 5472e8a
commit 5d84da3
Show file tree

Hide file tree

Showing 8 changed files with 139 additions and 464 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -5,6 +5,17 @@ Changelog
 All notable changes to PyEBSDIndex will be documented in this file. The format is based
 on `Keep a Changelog <https://keepachangelog.com/en/1.1.0>`_.
 
+0.3.3 (2024-06-07)
+==================
+
+Fixed
+-----
+- Fixed edge case for NLPAR chunking of scans that would lead to a crash.
+- Fixed issue where PyEBSDIndex would not use all GPUs by default.
+- ``IPFColor.makeipf()`` will now automatically read the number of columns/rows in the scan from the file defined in the indexer object.
+
+
+
 0.3.2 (2024-05-31)
 ==================
 

diff --git a/doc/tutorials/ebsd_index_demo.ipynb b/doc/tutorials/ebsd_index_demo.ipynb
diff --git a/pyebsdindex/EBSDImage/IPFcolor.py b/pyebsdindex/EBSDImage/IPFcolor.py
@@ -50,17 +50,26 @@ def makeipf(ebsddata, indexer, vector=np.array([0,0,1.0]), xsize = None, ysize =
 
   if xsize is not None:
     xsize = int(xsize)
-    if ysize is None:
-      ysize = int(npoints // xsize + np.int64((npoints % xsize) > 0))
+    #if ysize is None:
       #print(ysize)
   else:
-    xsize = int(npoints)
-    ysize = 1
+    xsize = indexer.fID.nCols
+    #xsize = int(npoints)
+    #ysize = 1
 
-  npts = int(npoints)
-  if int(xsize*ysize) < npoints:
-    npts = int(xsize*ysize)
-  ipf_out = ipfout[0:npts,:].reshape(ysize, xsize,3)
+  if ysize is not None:
+    ysize = int(ysize)
+  else:
+    ysize = int(npoints // xsize + np.int64((npoints % xsize) > 0))
+
+
+  ipf_out = np.zeros((ysize, xsize,3), dtype=np.float32)
+  ipf_out = ipf_out.flatten()
+  npts = min(int(npoints), int(xsize*ysize))
+  # if int(xsize*ysize) < npoints:
+  #   npts = int(xsize*ysize)
+  ipf_out[0:npts*3] = ipfout[0:npts,:].flatten()
+  ipf_out = ipf_out.reshape(ysize, xsize, 3)
   return ipf_out
 
 

diff --git a/pyebsdindex/__init__.py b/pyebsdindex/__init__.py
@@ -7,7 +7,7 @@
 ]
 __description__ = "Python based tool for Radon based EBSD indexing"
 __name__ = "pyebsdindex"
-__version__ = "0.3.2"
+__version__ = "0.3.3"
 
 
 # Try to import only once - also will perform check that at least one GPU is found.

diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py
@@ -294,6 +294,7 @@ def index_pats_distributed(
         else:
             if ngpu is None:
                 ngpu = len(clparam.gpu)
+                gpu_id = np.arange(ngpu, dtype=int)
             cudagpuvis = ''
             for cdgpu in range(len(clparam.gpu)):
                 cudagpuvis += str(cdgpu)+','

diff --git a/pyebsdindex/nlpar_cpu.py b/pyebsdindex/nlpar_cpu.py
@@ -389,7 +389,7 @@ def calcnlpar(self, chunksize=0, searchradius=None, lam = None, dthresh = None,
         rescale = False
 
     nthreadpos = numba.get_num_threads()
-    #numba.set_num_threads(36)
+    #numba.set_num_threads(18)
     colstartcount = np.asarray([0,ncols],dtype=np.int64)
     if verbose >= 1:
       print("lambda:", self.lam, "search radius:", self.searchradius, "dthresh:", self.dthresh)
@@ -756,50 +756,51 @@ def _calcchunks(self, patdim, ncol, nrow, target_bytes=2e9, col_overlap=0, row_o
     rowstepov = min(rowstep + 2 * row_overlap, nrow)
 
     # colchunks = np.round(np.arange(ncolchunks+1)*ncol/ncolchunks).astype(int)
-    colchunks = np.zeros((ncolchunks, 2), dtype=int)
-    colchunks[:, 0] = (np.arange(ncolchunks) * colstep).astype(int)
-    colchunks[:, 1] = colchunks[:, 0] + colstepov - int(col_overlap)
-    colchunks[:, 0] -= col_overlap
-    colchunks[0, 0] = 0;
-
-    for i in range(ncolchunks - 1):
-      if colchunks[i + 1, 0] >= ncol:
-        colchunks = colchunks[0:i + 1, :]
-
-    ncolchunks = colchunks.shape[0]
+    # colchunks = np.zeros((ncolchunks, 2), dtype=int)
+    # colchunks[:, 0] = (np.arange(ncolchunks) * colstep).astype(int)
+    # colchunks[:, 1] = colchunks[:, 0] + colstepov - int(col_overlap)
+    # colchunks[:, 0] -= col_overlap
+    # colchunks[0, 0] = 0;
+
+    colchunks = []
+    col_overlap = int(col_overlap)
+    for c in range(ncolchunks):
+      cchunk = [int(c * colstep) - col_overlap, int(c * colstep + colstepov) - col_overlap]
+      colchunks.append(cchunk)
+      if cchunk[1] > ncol:
+        break
+
+    ncolchunks = len(colchunks)
+    colchunks = np.array(colchunks, dtype=int)
+    colchunks[0, 0] = 0
     colchunks[-1, 1] = ncol
 
+    if ncolchunks > 1:
+      colchunks[-1, 0] = max(0, colchunks[-2, 1] - col_overlap)
+
     colchunks += col_offset
 
-    # colproc = np.zeros((ncolchunks, 2), dtype=int)
-    # if ncolchunks > 1:
-    #   colproc[1:, 0] = col_overlap
-    # if ncolchunks > 1:
-    #   colproc[0:, 1] = colchunks[:, 1] - colchunks[:, 0] - col_overlap
-    # colproc[-1, 1] = colchunks[-1, 1] - colchunks[-1, 0]
-
-    # rowchunks = np.round(np.arange(nrowchunks + 1) * nrow / nrowchunks).astype(int)
-    rowchunks = np.zeros((nrowchunks, 2), dtype=int)
-    rowchunks[:, 0] = (np.arange(nrowchunks) * rowstep).astype(int)
-    rowchunks[:, 1] = rowchunks[:, 0] + rowstepov - int(row_overlap)
-    rowchunks[:, 0] -= row_overlap
-    rowchunks[0, 0] = 0;
-
-    for i in range(nrowchunks - 1):
-      if rowchunks[i + 1, 0] >= nrow:
-        rowchunks = rowchunks[0:i + 1, :]
-
-    nrowchunks = rowchunks.shape[0]
+    # for i in range(ncolchunks - 1):
+    #   if colchunks[i + 1, 0] >= ncol:
+    #     colchunks = colchunks[0:i + 1, :]
+
+    rowchunks = []
+    row_overlap = int(row_overlap)
+    for r in range(nrowchunks):
+      rchunk = [int(r * rowstep) - row_overlap, int(r * rowstep + rowstepov) - row_overlap]
+      rowchunks.append(rchunk)
+      if rchunk[1] > nrow:
+        break
+
+    nrowchunks = len(rowchunks)
+    rowchunks = np.array(rowchunks, dtype=int)
+    rowchunks[0, 0] = 0
     rowchunks[-1, 1] = nrow
 
-    rowchunks += row_offset
+    if nrowchunks > 1:
+      rowchunks[-1, 0] = max(0, rowchunks[-2, 1] - row_overlap)
 
-    # rowproc = np.zeros((nrowchunks, 2), dtype=int)
-    # if nrowchunks > 1:
-    #   rowproc[1:, 0] = row_overlap
-    # if nrowchunks > 1:
-    #   rowproc[0:, 1] = rowchunks[:, 1] - rowchunks[:, 0] - row_overlap
-    # rowproc[-1, 1] = rowchunks[-1, 1] - rowchunks[-1, 0]
+    rowchunks += row_offset
 
     return ncolchunks, nrowchunks, colchunks, rowchunks
 

diff --git a/pyebsdindex/opencl/nlpar_cl.py b/pyebsdindex/opencl/nlpar_cl.py
@@ -106,7 +106,7 @@ def loptfunc(lam, d2, tw, dthresh):
     lamopt_values = []
 
     sigma, d2, n2 = self.calcsigma(nn=1, saturation_protect=saturation_protect, automask=automask, normalize_d=True,
-                                   return_nndist=True)
+                                   return_nndist=True, **kwargs)
 
     #sigmapad = np.pad(sigma, 1, mode='reflect')
     #d2normcl(d2, n2, sigmapad)
@@ -133,7 +133,7 @@ def loptfunc(lam, d2, tw, dthresh):
     return lamopt_values.flatten()
 
 
-  def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=False, gpu_id = None, **kwargs):
+  def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=False, gpu_id = None, verbose = 2, **kwargs):
     self.sigmann = nn
     if self.sigmann > 7:
       print("Sigma optimization search limited to a search radius <= 7")
@@ -222,7 +222,8 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa
     #count_local = cl.LocalMemory(nnn*npadmx*4)
     count_local = cl.Buffer(ctx, mf.READ_WRITE, size=int(mxchunk * nnn * 4))
     countchunk = np.zeros((mxchunk, nnn), dtype=np.float32)
-
+    ndone = 0
+    nchunks = int(chunks[1] * chunks[0])
     for rowchunk in range(chunks[1]):
       rstart = chunks[3][rowchunk, 0]
       rend = chunks[3][rowchunk, 1]
@@ -289,7 +290,9 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=Fa
         countnn[rstart:rend, cstart:cend] = countchunk[0:int(ncolchunk*nrowchunk), :].reshape(nrowchunk, ncolchunk, nnn)
         dist[rstart:rend, cstart:cend] = distchunk[0:int(ncolchunk*nrowchunk), :].reshape(nrowchunk, ncolchunk, nnn)
         sigma[rstart:rend, cstart:cend] = np.minimum(sigma[rstart:rend, cstart:cend], sigmachunk)
-
+        if verbose >= 2:
+          print("tiles complete: ", ndone, "/", nchunks, sep='', end='\r')
+        ndone +=1
     dist_local.release()
     count_local.release()
     datapad_gpu.release()

diff --git a/pyebsdindex/opencl/nlpar_clray.py b/pyebsdindex/opencl/nlpar_clray.py
@@ -479,7 +479,7 @@ def calcnlpar_clray(self, searchradius=None, lam = None, dthresh = None, saturat
                                          rescale = rescale,
                                          gpu_id= gpu_id)
 
-    target_mem = clparams.gpu[gpu_id].max_mem_alloc_size//3
+    target_mem = clparams.gpu[gpu_id].max_mem_alloc_size//6
     max_mem = clparams.gpu[gpu_id].global_mem_size*0.4
     if target_mem*ngpuwrker > max_mem:
       target_mem = max_mem/ngpuwrker