Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script for gain selection (R0 to R0G), for data taken in ADHF format … #1198

Merged
merged 38 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
d78320b
Script for gain selection (R0 to R0G), for data taken in ADHF format …
moralejo Dec 19, 2023
58d0935
Removed unused stuff
moralejo Dec 19, 2023
ec4ab2c
Separate first loop in a function
moralejo Dec 20, 2023
df8483c
Changes to allow processing EvB6 data
moralejo Dec 21, 2023
b80a404
Limit scipy to version before 1.12 which introduces a breaking change
gabemery Jan 22, 2024
180c987
unfreeze sphinx version
morcuended Jan 22, 2024
30f0a2d
Deal with missing modules in modifier.py
moralejo Jan 22, 2024
335b23f
Added protozfits
moralejo Jan 23, 2024
7a37ef2
Address various suggestions
moralejo Jan 23, 2024
20316cd
Update test_lstchain_scripts.py
moralejo Jan 25, 2024
d48e3ce
Path to needed files
moralejo Jan 25, 2024
dbff077
Update test_lstchain_scripts.py
moralejo Jan 25, 2024
cf3c201
Update test_lstchain_scripts.py
moralejo Jan 25, 2024
b054751
Now the script works only for EvB6 data (CTAR1). No need for Drive lo…
moralejo Jan 26, 2024
015c000
Removed unused variables
moralejo Jan 26, 2024
a1957d9
Proper naming of the data format
moralejo Jan 26, 2024
552c83d
Changed name of test file
moralejo Jan 26, 2024
d3211b9
Update test_lstchain_scripts.py
moralejo Feb 20, 2024
465ee28
Merge branch 'main' into gain_selector
moralejo Feb 20, 2024
1888038
Baseline offset read from file
moralejo Feb 20, 2024
d72d37c
Update test_lstchain_scripts.py
moralejo Feb 20, 2024
6bc41e0
Update lstchain_r0_to_r0g.py
moralejo Feb 20, 2024
1235d47
Update lstchain_r0_to_r0g.py
moralejo Feb 20, 2024
68eb0ee
Update lstchain_r0_to_r0g.py
moralejo Feb 20, 2024
10f9402
Update lstchain_r0_to_r0g.py
moralejo Feb 20, 2024
008fbf1
Use str instead of Path in some calls.
moralejo Feb 20, 2024
e1b4fb0
Fix issue with setting input filenames for the different streams
moralejo Feb 21, 2024
6687283
WIP: removed unnecessary first loop over files.
moralejo Feb 21, 2024
6efb3e5
Solved mess about EventType and its numerical values
moralejo Feb 21, 2024
0bd82ee
changes in logger
moralejo Feb 21, 2024
b5fce9e
Various fixes. Better logging.
moralejo Feb 21, 2024
2824d2c
Better use of parser
moralejo Feb 22, 2024
2796700
Log file to output directory
moralejo Feb 22, 2024
628a242
Merge branch 'gain_selector' of github.com:cta-observatory/cta-lstcha…
moralejo Feb 22, 2024
a132686
Fixed indents
moralejo Feb 22, 2024
3b14eb7
Preserve higher bits of pixel status in case we ever use them
moralejo Feb 22, 2024
f6812a9
Merge branch 'gain_selector' of github.com:cta-observatory/cta-lstcha…
moralejo Feb 22, 2024
39a5027
Update protozfits version specification in setup.py
moralejo Feb 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies:
- joblib~=1.2.0
- toml
- protobuf=3.20
- protozfits=2.2
moralejo marked this conversation as resolved.
Show resolved Hide resolved
moralejo marked this conversation as resolved.
Show resolved Hide resolved
- pyparsing
- scipy~=1.11.4
- scikit-learn=1.2
Expand Down
2 changes: 2 additions & 0 deletions lstchain/image/modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,15 @@ def calculate_noise_parameters(simtel_filename, data_dl1_filename,

# Identify noisy pixels, likely containing stars - we want to adjust MC to
# the average diffuse NSB across the camera

data_median_std_ped_pe = np.nanmedian(data_HG_ped_std_pe[good_pixels])
data_std_std_ped_pe = np.nanstd(data_HG_ped_std_pe[good_pixels])
log.info('\nReal data:')
log.info(f' Number of bad pixels (from calibration): {bad_pixels.sum()}')
log.info(f' Median of FF pixel charge: '
f'{np.nanmedian(data_HG_FF_mean_pe[good_pixels]):.3f} p.e.')
log.info(f' Median across camera of good pixels\' pedestal std '

f'{data_median_std_ped_pe:.3f} p.e.')
brightness_limit = data_median_std_ped_pe + 3 * data_std_std_ped_pe
too_bright_pixels = (data_HG_ped_std_pe > brightness_limit)
Expand Down
236 changes: 236 additions & 0 deletions lstchain/scripts/lstchain_r0_to_r0g.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
#!/usr/bin/env python
moralejo marked this conversation as resolved.
Show resolved Hide resolved
"""
Script to apply gain selection to the raw fits.fz produced by EvB6 in CTAR1
format (aka R1v1).

Gain selection is only applied to shower events (not to interleaved pedestal
and flatfield)

It uses heuristic identification of the interleaved flatfield (FF) events (
given the occasional problems we have with the FF event tagging)
moralejo marked this conversation as resolved.
Show resolved Hide resolved

"""
import logging
import protozfits
import argparse
import sys
import re

from pathlib import Path
from ctapipe.containers import EventType

import lstchain.paths as paths
import numpy as np
from contextlib import ExitStack

parser = argparse.ArgumentParser(description="Gain selector program (R0 to "
"R0G)")
parser.add_argument('-f', '--R0-file', dest='input_file',
type=str, help='Input R0 file name (of stream 1)')

parser.add_argument('-o', '--output-dir', dest='output_dir',
type=str, default='./',
help='Output directory')

parser.add_argument('--log', dest='log_file',
type=str, default=None,
help='Log file name')

parser.add_argument('--no-flatfield-heuristic', action='store_false',
dest="use_flatfield_heuristic",
help=("If given, do *not* identify flatfield events"
" heuristically from the raw data. "
"Trust event_type."))

# Range of waveform to be checked (for gain selection & heuristic FF
# identification)
SAMPLE_START = 3
SAMPLE_END = 39

# Level of high gain (HG) required for switching to low gain (LG)
THRESHOLD = 3900

# Events for which gain selection will be applied:
EVENT_TYPES_TO_REDUCE = [EventType.SUBARRAY, EventType.UNKNOWN]

def main():
args = parser.parse_args()

input_file = args.input_file
output_dir = args.output_dir
use_flatfield_heuristic = args.use_flatfield_heuristic

log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

log_file = args.log_file
runinfo = paths.parse_r0_filename(input_file)

if log_file is None:
log_file = output_dir
log_file += f'/R0_to_R0g_Run{runinfo.run:05d}.{runinfo.subrun:04d}.log'

formatter = logging.Formatter('%(asctime)s - '
'%(levelname)s - %(message)s',
'%Y-%m-%d %H:%M:%S')
handler = logging.FileHandler(log_file, mode='w')
handler.setFormatter(formatter)
logging.getLogger().addHandler(handler)

# Loop over the files (4 streams) to perform the gain selection:

input_stream_names = [Path(Path(input_file).parent,
moralejo marked this conversation as resolved.
Show resolved Hide resolved
re.sub("LST-1...Run",
f"LST-1.{id_stream+1}.Run",
Path(input_file).name))
for id_stream in range(4)]

output_stream_names = [Path(output_dir, Path(inputsn).name)
for inputsn in input_stream_names]

input_streams = []
for name in input_stream_names:
input_streams.append(protozfits.File(str(name), pure_protobuf=True))

try:
camera_config = input_streams[0].CameraConfiguration[0]
except (AttributeError, IndexError):
log.error('CameraConfiguration not found! Is this R1v1 data?')
sys.exit(1)

Check warning on line 99 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L97-L99

Added lines #L97 - L99 were not covered by tests

num_pixels = camera_config.num_pixels
num_samples = camera_config.num_samples_nominal

# Counters to keep track of how many FF-like events are
# tagged as FF, and how many are not (and vice-versa):
num_FF_like_with_FF_type = 0
num_FF_like_with_no_FF_type = 0
num_FF_unlike_with_FF_type = 0
num_events = 0

with ExitStack() as stack:
for i, name in enumerate(output_stream_names):
header = input_streams[i].Events.header
n_tiles = header["NAXIS2"]
rows_per_tile = header["ZTILELEN"]

stream = stack.enter_context(protozfits.ProtobufZOFits(
n_tiles=n_tiles,
rows_per_tile=rows_per_tile,
compression_block_size_kb=64*1024,
defaul_compression="lst"))
stream.open(str(name))

stream.move_to_new_table("DataStream")
stream.write_message(input_streams[i].DataStream[0])
moralejo marked this conversation as resolved.
Show resolved Hide resolved
moralejo marked this conversation as resolved.
Show resolved Hide resolved

stream.move_to_new_table("CameraConfiguration")
stream.write_message(input_streams[i].CameraConfiguration[0])

stream.move_to_new_table("Events")

wf_offset = input_streams[i].DataStream[0].waveform_offset

for event in input_streams[i].Events:
# skip corrupted events:
if event.event_id == 0:
continue

Check warning on line 137 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L137

Added line #L137 was not covered by tests
num_events += 1

num_gains = event.num_channels
if num_gains != 2:
log.error('You are attempting gain selection on '

Check warning on line 142 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L142

Added line #L142 was not covered by tests
'data with only one gain!')
sys.exit(1)

Check warning on line 144 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L144

Added line #L144 was not covered by tests

wf = protozfits.any_array_to_numpy(event.waveform)
wf = wf.reshape((num_gains, num_pixels, num_samples))
hg = wf[0]
lg = wf[1]

evtype = EventType(event.event_type)
evtype_heuristic = get_event_type(hg, wf_offset, evtype)

if evtype == EventType.FLATFIELD:
if evtype_heuristic == EventType.FLATFIELD:
num_FF_like_with_FF_type += 1
else:
num_FF_unlike_with_FF_type += 1

Check warning on line 158 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L158

Added line #L158 was not covered by tests
elif evtype_heuristic == EventType.FLATFIELD:
num_FF_like_with_no_FF_type += 1

Check warning on line 160 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L160

Added line #L160 was not covered by tests

if use_flatfield_heuristic:
evtype = evtype_heuristic

if evtype in EVENT_TYPES_TO_REDUCE:
# Find pixels with HG above gain switch threshold:
use_lg = (np.max(hg[:, SAMPLE_START:SAMPLE_END], axis=1)
> THRESHOLD)
new_wf = np.where(use_lg[:, None], lg, hg) # gain-selected
event.waveform.data = new_wf.tobytes()
event.num_channels = 1 # Just one gain stored!

pixel_status = protozfits.any_array_to_numpy(event.pixel_status)
# Set to 0 the status bit of the removed gain:
new_status = np.where(use_lg,
pixel_status & 0b11111011,
pixel_status & 0b11110111)
event.pixel_status.data = new_status.tobytes()

stream.write_message(event)

stream.close()
input_streams[i].close()


log.info('Number of processed events: %d', num_events)
if num_FF_like_with_FF_type > 0:
log.info('FF-like events tagged as FF: %d',
num_FF_like_with_FF_type)
else:
log.warn('FF-like events tagged as FF: %d !!',

Check warning on line 191 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L191

Added line #L191 was not covered by tests
num_FF_like_with_FF_type)

log.info('FF-like events not tagged as FF: %d',
num_FF_like_with_no_FF_type)

log.info('FF-unlike events tagged as FF: %d',
num_FF_unlike_with_FF_type)


num_FF_like = num_FF_like_with_no_FF_type + num_FF_like_with_FF_type

# If a relevant fraction of FF-like events were not tagged as FF...:
max_frac = 0.1
if ((num_FF_like_with_no_FF_type / num_FF_like > max_frac) &
(use_flatfield_heuristic == False)):
log.warn('More than %d percent of FF-like events '

Check warning on line 207 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L207

Added line #L207 was not covered by tests
'have wrong event_type!', int(100*max_frac))
log.warn('You should use heuristic identification of FF events!')

Check warning on line 209 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L209

Added line #L209 was not covered by tests
else:
log.info('R0 to R0G conversion finished successfully!')

def get_event_type(wf_hg, offset, evtype):

# For heuristic flat field identification (values refer to
# baseline-subtracted HG integrated waveforms):
MIN_FLATFIELD_ADC = 3000
MAX_FLATFIELD_ADC = 12000
MIN_FLATFIELD_PIXEL_FRACTION = 0.8

evtype_heuristic = evtype

# pixel-wise integral:
wf_hg_sum = np.sum(wf_hg - offset, axis=1)
ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
(wf_hg_sum < MAX_FLATFIELD_ADC))

# Check fraction of pixels with HG in "FF-like range"
if ff_pix.sum() / len(ff_pix) > MIN_FLATFIELD_PIXEL_FRACTION:
# Looks like a FF event:
evtype_heuristic = EventType.FLATFIELD
elif evtype == EventType.FLATFIELD:
# Does not look like a FF, but was tagged as such:
evtype_heuristic = EventType.UNKNOWN

Check warning on line 234 in lstchain/scripts/lstchain_r0_to_r0g.py

View check run for this annotation

Codecov / codecov/patch

lstchain/scripts/lstchain_r0_to_r0g.py#L234

Added line #L234 was not covered by tests

return evtype_heuristic
25 changes: 25 additions & 0 deletions lstchain/scripts/tests/test_lstchain_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from astropy.time import Time
from ctapipe.instrument import SubarrayDescription
from ctapipe.io import read_table
from ctapipe.io import EventSource
from ctapipe.containers import EventType


from lstchain.io.config import get_srcdep_config, get_standard_config
from lstchain.io.io import (dl1_images_lstcam_key, dl1_params_lstcam_key,
Expand Down Expand Up @@ -83,6 +86,28 @@ def merged_simulated_dl1_file(simulated_dl1_file, temp_dir_simulated_files):
def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
assert simulated_dl1_file.is_file()

@pytest.mark.private_data
def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
input_file = test_data / "real/R0/20231214/LST-1.1.Run16102.0000_first50" \
".fits.fz"
output_dir = temp_dir_observed_files / "R0G"
output_dir.mkdir()
run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir)
output_file = output_dir / input_file.name
assert output_file.is_file()
moralejo marked this conversation as resolved.
Show resolved Hide resolved

src = EventSource(input_url=output_file)
src.pointing_information = False
src.trigger_information = False
src.apply_drs4_corrections = False
# Check number of gains for first event of each type:
for evtype, ngains in zip([EventType.FLATFIELD, EventType.SKY_PEDESTAL,
EventType.SUBARRAY], [2, 2, 1]):
for event in src:
if event.trigger.event_type == evtype:
break
assert(event.r0.tel[1].waveform.shape[0] == ngains)

@pytest.mark.private_data
def test_lstchain_data_r0_to_dl1(observed_dl1_files):
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def find_scripts(script_dir, prefix):
'scikit-learn~=1.2',
'tables',
'toml',
'protozfits>=2.2,<3',
'pymongo',
'pyparsing',
'setuptools_scm',
Expand Down
Loading