-
Notifications
You must be signed in to change notification settings - Fork 14
/
multicut.py
executable file
·127 lines (104 loc) · 5.37 KB
/
multicut.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python
import os
import json
import sys
import luigi
from cluster_tools import MulticutSegmentationWorkflow
def run_mc(input_path, tmp_folder, max_jobs,
n_scales=1, have_watershed=True, target='local',
from_affinities=False, invert_inputs=False):
""" Run multicut on cremi sample or similar data.
You can obtain the data used for this examle from
https://drive.google.com/file/d/1E_Wpw9u8E4foYKk7wvx5RPSWvg_NCN7U/view?usp=sharing
Args:
input_path: n5 or hdf5 container with input data
(boundary maps or affinity maps)
tmp_folder: temporary folder to store job files
max_jobs: maximal number of jobs
n_scales: number of scales for hierarchical solver (0 will perform vanilla multicut)
have_watershed: flag to indicate if the watershed is computed already
target: target platform, either 'local' (computation on local host),
'slurm' (cluster running slurm)
or 'lsf' (cluster running lsf)
from_affinities: whether to use affinity maps or boundary maps
invert_inputs: whether to invert the inputs; this needs to be set to true
if HIGH boundary evidence correponds to LOWER values in boundary /
affinity maps
"""
# path with the watershed data, can be the same as input_path
ws_path = input_path
# key for input, and watershed
input_key = 'volumes/affinities'
ws_key = 'volumes/segmentation/watershed'
# path to n5 or hdf5 container to which the output segmentation should be written
# can be the same as input_path
out_path = input_path
out_key = 'volumes/segmentation/multicut'
# path and key for mask
# mask can be used to exclude parts of the volume from segmentation
# leave blank if you don't have a mask
mask_path = ''
mask_key = ''
# n5 container for intermediate results like graph-structure or features
exp_path = './sampleA_exp.n5'
# config folder holds configurations for workflow steps stored as json
configs = MulticutSegmentationWorkflow.get_config()
config_folder = 'configs'
os.makedirs(config_folder, exist_ok=True)
# global workflow config
# python interpreter of conda environment with dependencies, see
# https://github.com/constantinpape/cluster_tools/blob/master/environment.yml
# shebang = "#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python"
shebang = f"#! {sys.executable}"
# block shape used for parallelization
block_shape = [30, 256, 256]
global_config = configs['global']
global_config.update({'shebang': shebang, 'block_shape': block_shape})
with open('./configs/global.config', 'w') as f:
json.dump(global_config, f)
# config for the watershed calculation
ws_config = configs['watershed']
ws_config.update({'threshold': 0.25, 'apply_ws_2d': True, 'apply_dt_2d': True})
if from_affinities:
ws_config.update({'channel_begin': 0, 'channel_end': 3, 'agglomerate_channels': 'max'})
with open('./configs/watershed.config', 'w') as f:
json.dump(ws_config, f)
# config for edge feature calculation
feat_config = configs['block_edge_features']
# specify offsets if you have affinity features.
if from_affinities:
feat_config.update({'offsets': [[-1, 0, 0], [0, -1, 0], [0, 0, -1]]})
with open('./configs/block_edge_features.config', 'w') as f:
json.dump(feat_config, f)
# config for converting edge probabilities to edge costs
costs_config = configs['probs_to_costs']
costs_config.update({'threads_per_job': max_jobs, 'weight_edges': True, 'invert_inputs': invert_inputs})
with open('./configs/probs_to_costs.config', 'w') as f:
json.dump(costs_config, f)
# set number of threads for sum jobs
tasks = ['merge_sub_graphs', 'merge_edge_features', 'probs_to_costs',
'solve_subproblems', 'reduce_problem', 'solve_global']
for tt in tasks:
config = configs[tt]
config.update({'threads_per_job': max_jobs, 'mem_limit': 8})
with open('./configs/%s.config' % tt, 'w') as f:
json.dump(config, f)
luigi.build([MulticutSegmentationWorkflow(input_path=input_path, input_key=input_key,
ws_path=ws_path, ws_key=ws_key,
mask_path=mask_path, mask_key=mask_key,
problem_path=exp_path,
node_labels_key='node_labels',
output_path=out_path, output_key=out_key,
n_scales=n_scales,
config_dir=config_folder,
tmp_folder=tmp_folder,
target=target,
skip_ws=have_watershed,
max_jobs=max_jobs)], local_scheduler=True)
if __name__ == '__main__':
# path = '/g/kreshuk/data/cremi/example/sampleA.n5'
path = "./sampleA.n5"
tmp_folder = './tmp_mc'
target = 'local'
max_jobs = 8
run_mc(path, tmp_folder, max_jobs, target=target, from_affinities=True)