Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenCL] include type conversion in preprocess #1960

Merged
merged 18 commits into from
Oct 4, 2023
Merged
2 changes: 1 addition & 1 deletion doc/source/usage/tutorial/integrate2d.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.0"
}
},
"nbformat": 4,
Expand Down
8 changes: 3 additions & 5 deletions pyFAI/azimuthalIntegrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
__contact__ = "[email protected]"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "29/09/2023"
__date__ = "03/10/2023"
__status__ = "stable"
__docformat__ = 'restructuredtext'

Expand Down Expand Up @@ -377,18 +377,16 @@ def setup_sparse_integrator(self,
Unit can also be a 2-tuple in the case of a 2D integration
"""
if isinstance(unit, (list, tuple)) and len(unit) == 2:
unit0, unit1 = unit
unit0, unit1 = tuple(units.to_unit(u) for u in unit)
else:
unit0 = unit
unit0 = units.to_unit(unit)
unit1 = units.CHI_DEG
if scale and pos0_range:
unit0 = units.to_unit(unit0)
pos0_scale = unit0.scale
pos0_range = tuple(pos0_range[i] / pos0_scale for i in (0, -1))
if "__len__" in dir(npt) and len(npt) == 2:
int2d = True
if scale and pos1_range:
unit1 = units.to_unit(unit1)
pos1_scale = unit1.scale
pos1_range = tuple(pos1_range[i] / pos1_scale for i in (0, -1))
else:
Expand Down
14 changes: 13 additions & 1 deletion pyFAI/opencl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,13 @@
__contact__ = "[email protected]"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "14/11/2022"
__date__ = "03/10/2023"
__status__ = "stable"

import os
import logging
import platform
import numpy
from ..utils.decorators import deprecated
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -73,3 +74,14 @@ def get_x87_volatile_option(ctx):
return "-DX87_VOLATILE=volatile"
else:
return ""


def dtype_converter(dtype):
"convert a numpy dtype as a int8"
dtype = numpy.dtype(dtype)
if numpy.issubdtype(dtype, numpy.signedinteger):
return numpy.int8(-dtype.itemsize)
elif numpy.issubdtype(dtype, numpy.unsignedinteger):
return numpy.int8(dtype.itemsize)
else:
return numpy.int8(8*dtype.itemsize)
127 changes: 82 additions & 45 deletions pyFAI/opencl/azim_csr.py

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions pyFAI/opencl/azim_hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"""
__author__ = "Jérôme Kieffer"
__license__ = "MIT"
__date__ = "26/09/2023"
__date__ = "29/09/2023"
__copyright__ = "2012-2021, ESRF, Grenoble"
__contact__ = "[email protected]"

Expand Down Expand Up @@ -66,7 +66,8 @@ class OCL_Histogram1d(OpenclProcessing):
buffers = [BufferDescription("output4", 4, numpy.float32, mf.READ_WRITE),
BufferDescription("radial", 1, numpy.float32, mf.READ_ONLY),
BufferDescription("azimuthal", 1, numpy.float32, mf.READ_ONLY),
BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
BufferDescription("tmp", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("image_raw", 1, numpy.int64, mf.READ_ONLY),
BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("variance", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("dark", 1, numpy.float32, mf.READ_WRITE),
Expand Down
48 changes: 27 additions & 21 deletions pyFAI/opencl/azim_lut.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Project: Azimuthal integration
# https://github.com/silx-kit/pyFAI
#
# Copyright (C) 2012-2022 European Synchrotron Radiation Facility, Grenoble, France
# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer ([email protected])
#
Expand All @@ -27,25 +27,25 @@

__author__ = "Jérôme Kieffer"
__license__ = "MIT"
__date__ = "26/09/2023"
__date__ = "04/10/2023"
__copyright__ = "2012-2021, ESRF, Grenoble"
__contact__ = "[email protected]"

import logging
from collections import OrderedDict
import numpy
from . import pyopencl
from . import pyopencl, dtype_converter
from ..utils import calc_checksum
if pyopencl:
mf = pyopencl.mem_flags
else:
raise ImportError("pyopencl is not installed")
from ..containers import Integrate1dtpl, Integrate2dtpl, ErrorModel
from . import processing, OpenclProcessing
from . import get_x87_volatile_option
EventDescription = processing.EventDescription
BufferDescription = processing.BufferDescription


logger = logging.getLogger(__name__)


Expand All @@ -57,7 +57,8 @@ class OCL_LUT_Integrator(OpenclProcessing):
BLOCK_SIZE = 32
buffers = [BufferDescription("output", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("output4", 4, numpy.float32, mf.READ_WRITE),
BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
BufferDescription("tmp", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("image_raw", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("variance", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("dark", 1, numpy.float32, mf.READ_WRITE),
Expand Down Expand Up @@ -150,6 +151,7 @@ def __init__(self, lut, image_size, checksum=None,
BufferDescription("merged8", (self.bins, 8), numpy.float32, mf.WRITE_ONLY),
]
self.allocate_buffers()
self.buffer_dtype = {i.name:numpy.dtype(i.dtype) for i in self.buffers}
self.compile_kernels()
self.set_kernel_arguments()
if self.device.type == "CPU":
Expand Down Expand Up @@ -208,12 +210,7 @@ def compile_kernels(self, kernel_file=None):
compile_options = "-D NBINS=%i -D NIMAGE=%i -D NLUT=%i -D ON_CPU=%i" % \
(self.bins, self.size, self.lut_size, int(self.device.type == "CPU"))

try:
default_compiler_options = self.get_compiler_options(x87_volatile=True)
except AttributeError: # Silx version too old
logger.warning("Please upgrade to silx v0.10+")
default_compiler_options = get_x87_volatile_option(self.ctx)

default_compiler_options = self.get_compiler_options(x87_volatile=True)
if default_compiler_options:
compile_options += " " + default_compiler_options
OpenclProcessing.compile_kernels(self, kernels, compile_options)
Expand Down Expand Up @@ -276,7 +273,6 @@ def set_kernel_arguments(self):
("delta_dummy", numpy.float32(0)),
("normalization_factor", numpy.float32(1.0)),
("output4", self.cl_mem["output4"])))

self.cl_kernel_args["lut_integrate4"] = OrderedDict((("output4", self.cl_mem["output4"]),
("lut", self.cl_mem["lut"]),
("empty", numpy.float32(self.empty)),
Expand All @@ -300,9 +296,11 @@ def send_buffer(self, data, dest, checksum=None):

:param data: numpy array with data
:param dest: name of the buffer as registered in the class
:param convert: if True (default) convert dtype on GPU, if false, leave as it is.
:return: the actual buffer where the data were sent
"""

dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
dest_type = self.buffer_dtype[dest]
events = []
if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type))
Expand Down Expand Up @@ -400,7 +398,7 @@ def integrate_legacy(self, data, dummy=None, delta_dummy=None,
if not solidangle_checksum:
solidangle_checksum = calc_checksum(solidangle, safe)
if solidangle_checksum != self.on_device["solidangle"]:
self.send_buffer(solidangle, "solidangle", flat_checksum)
self.send_buffer(solidangle, "solidangle", solidangle_checksum)
else:
do_solidangle = numpy.int8(0)
kw_corr['do_solidangle'] = do_solidangle
Expand Down Expand Up @@ -478,7 +476,7 @@ def integrate_ng(self, data, dark=None, dummy=None, delta_dummy=None,
polarization_checksum=None, absorption_checksum=None, dark_variance_checksum=None,
safe=True,
normalization_factor=1.0,
out_avgint=None, out_std=None, out_sem=None, out_merged=None):
out_avgint=None, out_sem=None, out_std=None, out_merged=None):
"""
Before performing azimuthal integration with proper variance propagation, the preprocessing is:

Expand Down Expand Up @@ -508,19 +506,24 @@ def integrate_ng(self, data, dark=None, dummy=None, delta_dummy=None,
:param safe: if True (default) compares arrays on GPU according to their checksum, unless, use the buffer location is used
:param preprocess_only: return the dark subtracted; flat field & solidangle & polarization corrected image, else
:param normalization_factor: divide raw signal by this value
:param out_avgint: destination array or pyopencl array for sum of all data
:param out_sem: destination array or pyopencl array for sum of the number of pixels
:param out_avgint: destination array or pyopencl array for average intensity
:param out_sem: destination array or pyopencl array for standard deviation (of mean)
:param out_std: destination array or pyopencl array for standard deviation (of pixels)
:param out_merged: destination array or pyopencl array for averaged data (float8!)
:return: large namedtuple with out_avgint, out_sem, out_merged ...
"""
events = []
with self.sem:
kernel_correction_name = "corrections4"
corrections4 = self.kernels.corrections4
kw_corr = self.cl_kernel_args[kernel_correction_name]
self.send_buffer(data, "image")

wg = self.workgroup_size
wdim_bins = (self.bins + wg[0] - 1) & ~(wg[0] - 1),
memset = self.kernels.memset_out(self.queue, wdim_bins, wg, *list(self.cl_kernel_args["memset_ng"].values()))
events.append(EventDescription("memset_ng", memset))
kw_corr = self.cl_kernel_args["corrections4"]

kw_int = self.cl_kernel_args["lut_integrate4"]

if dummy is not None:
Expand Down Expand Up @@ -602,8 +605,8 @@ def integrate_ng(self, data, dark=None, dummy=None, delta_dummy=None,
do_absorption = numpy.int8(0)
kw_corr["do_absorption"] = do_absorption

ev = self.kernels.corrections4(self.queue, self.wdim_data, self.workgroup_size, *list(kw_corr.values()))
events.append(EventDescription("corrections4", ev))
ev = corrections4(self.queue, self.wdim_data, self.workgroup_size, *list(kw_corr.values()))
events.append(EventDescription(kernel_correction_name, ev))

kw_int["empty"] = dummy
integrate = self.kernels.lut_integrate4(self.queue, wdim_bins, self.workgroup_size, *kw_int.values())
Expand All @@ -615,24 +618,27 @@ def integrate_ng(self, data, dark=None, dummy=None, delta_dummy=None,
merged = None
else:
merged = out_merged.data

if out_avgint is None:
avgint = numpy.empty(self.bins, dtype=numpy.float32)
elif out_avgint is False:
avgint = None
else:
avgint = out_avgint.data

if out_sem is None:
sem = numpy.empty(self.bins, dtype=numpy.float32)
elif out_sem is False:
sem = None
else:
sem = out_sem.data

if out_std is None:
std = numpy.empty(self.bins, dtype=numpy.float32)
elif out_std is False:
std = None
else:
std = out_sem.data
std = out_std.data

ev = pyopencl.enqueue_copy(self.queue, avgint, self.cl_mem["averint"])
events.append(EventDescription("copy D->H avgint", ev))
Expand Down
33 changes: 19 additions & 14 deletions pyFAI/opencl/peak_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# https://github.com/silx-kit/pyFAI
#
#
# Copyright (C) 2014-2022 European Synchrotron Radiation Facility, Grenoble, France
# Copyright (C) 2014-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer ([email protected])
#
Expand All @@ -29,7 +29,7 @@

__authors__ = ["Jérôme Kieffer"]
__license__ = "MIT"
__date__ = "31/01/2023"
__date__ = "04/10/2023"
__copyright__ = "2014-2023, ESRF, Grenoble"
__contact__ = "[email protected]"

Expand All @@ -40,17 +40,17 @@
from ..containers import SparseFrame, ErrorModel
from ..utils import EPS32
from .azim_csr import OCL_CSR_Integrator, BufferDescription, EventDescription, mf, calc_checksum, pyopencl, OpenclProcessing
from . import get_x87_volatile_option
from . import kernel_workgroup_size
from . import get_x87_volatile_option, kernel_workgroup_size, dtype_converter

logger = logging.getLogger(__name__)


class OCL_PeakFinder(OCL_CSR_Integrator):
BLOCK_SIZE = 1024 # unlike in OCL_CSR_Integrator, here we need larger blocks
buffers = [BufferDescription("output", 1, numpy.float32, mf.WRITE_ONLY),
buffers = [BufferDescription("output", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("output4", 4, numpy.float32, mf.READ_WRITE),
BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
BufferDescription("tmp", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("image_raw", 1, numpy.int64, mf.READ_WRITE),
BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("variance", 1, numpy.float32, mf.READ_WRITE),
BufferDescription("dark", 1, numpy.float32, mf.READ_WRITE),
Expand Down Expand Up @@ -111,12 +111,12 @@ def __init__(self, lut, image_size, checksum=None,
profile=profile, extra_buffers=extra_buffers)

if mask is None:
self.cl_kernel_args["corrections4"]["do_mask"] = numpy.int8(0)
self.cl_kernel_args["corrections4a"]["do_mask"] = numpy.int8(0)
self.mask = None
else:
self.mask = numpy.ascontiguousarray(mask, numpy.int8)
self.send_buffer(self.mask, "mask")
self.cl_kernel_args["corrections4"]["do_mask"] = numpy.int8(1)
self.cl_kernel_args["corrections4a"]["do_mask"] = numpy.int8(1)

if self.bin_centers is None:
raise RuntimeError("1D bin center position is mandatory")
Expand Down Expand Up @@ -224,14 +224,19 @@ def _sigma_clip(self, data, dark=None, dummy=None, delta_dummy=None,
:return: list of event to wait for.
"""
events = []
self.send_buffer(data, "image")
# convert = (data.dtype.itemsize>4)
# self.send_buffer(data, "image", convert=convert)
kernel_correction_name = "corrections4a"
corrections4 = self.kernels.corrections4a
kw_corr = self.cl_kernel_args[kernel_correction_name]
kw_corr["image"] = self.send_buffer(data, "image", convert=False)
kw_corr["dtype"] = numpy.int8(32) if data.dtype.itemsize>4 else dtype_converter(data.dtype)

wg = max(self.workgroup_size["memset_ng"])
wdim_bins = (self.bins + wg - 1) & ~(wg - 1),
memset = self.kernels.memset_out(self.queue, wdim_bins, (wg,), *list(self.cl_kernel_args["memset_ng"].values()))
events.append(EventDescription("memset_ng", memset))

# Prepare preprocessing
kw_corr = self.cl_kernel_args["corrections4"]
if dummy is not None:
do_dummy = numpy.int8(1)
dummy = numpy.float32(dummy)
Expand Down Expand Up @@ -314,10 +319,10 @@ def _sigma_clip(self, data, dark=None, dummy=None, delta_dummy=None,
kw_int = self.cl_kernel_args["csr_sigma_clip4"]
kw_corr["error_model"] = kw_int["error_model"] = numpy.int8(error_model.value)

wg = max(self.workgroup_size["corrections4"])
wg = max(self.workgroup_size[kernel_correction_name])
wdim_data = (self.size + wg - 1) & ~(wg - 1),
ev = self.kernels.corrections4(self.queue, wdim_data, (wg,), *list(kw_corr.values()))
events.append(EventDescription("corrections", ev))
ev = corrections4(self.queue, wdim_data, (wg,), *list(kw_corr.values()))
events.append(EventDescription(kernel_correction_name, ev))

# Prepare sigma-clipping
kw_int["cutoff"] = numpy.float32(cutoff_clip)
Expand Down
Loading
Loading