-
Notifications
You must be signed in to change notification settings - Fork 34
/
Params.groovy
1567 lines (1316 loc) · 44.4 KB
/
Params.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package cz.siret.prank.program.params
import com.google.common.annotations.Beta
import com.google.common.collect.ImmutableSet
import cz.siret.prank.program.Main
import cz.siret.prank.program.PrankException
import cz.siret.prank.utils.CmdLineArgs
import cz.siret.prank.utils.Sutils
import groovy.transform.AutoClone
import groovy.transform.CompileDynamic
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
/**
* Holds all global parameters of the program.
*
* This file is also main source of parameter description/documentation.
*
* Parameter annotations:
* @RuntimeParam ... Parameters related to program execution.
* @ModelParam ... Actual parameters of the algorithm, related to extracting features and calculating results.
* It is important that those parameters stay the same when training a model and then using it for inference.
* @ModelParam // training ... Model params used only in training phase but not during inference.
*/
@Slf4j
@AutoClone
@CompileStatic
class Params {
public static Params INSTANCE = new Params()
public static Params getInst() {
return INSTANCE
}
/**
* define this if you want dataset program parameters to be evaluated relative to this directory
* (set absolute path or path relative to install dir, null defaults to working dir)
*/
@RuntimeParam
String dataset_base_dir = null
/**
* all output of the program will be stored in subdirectories of this directory
* (set absolute path or path relative to install dir, null defaults to working dir)
* {version} is replaced by program version
*/
@RuntimeParam
String output_base_dir = null
/**
* Location of pre-trained serialized model.
* (set path relative to install_dir/models/)
*/
@RuntimeParam
String model = "default"
/**
* Random seed
*/
@RuntimeParam
int seed = 42
/**
* Parallel execution (processing datasets in parallel)
*/
@RuntimeParam
boolean parallel = true
/**
* Number of computing threads
*/
@RuntimeParam
int threads = Runtime.getRuntime().availableProcessors() + 1
/**
* Number for threads used for generating R plots
*/
@RuntimeParam
int r_threads = 2
/**
* Generate plots for each metric with R when doing grid optimization (ploop command) on 1 or 2 variables
*/
@RuntimeParam
boolean r_generate_plots = true
/**
* Generate standard deviation plot for each statistic when generating R plots
*/
@RuntimeParam
boolean r_plot_stddevs = false
/**
* Number of cross-validation folds to work on simultaneously.
* (Multiplies required memory)
*/
@RuntimeParam
int crossval_threads = 1
/**
* defines witch atoms around the ligand are considered to be part of the pocket
* (ligands with longer distance are considered 'distant', i.e. irrelevant floating ligands)
*/
@ModelParam // training
double ligand_protein_contact_distance = 4
/**
* acceptable distance between ligand center and closest protein atom for relevant ligands
* (ligands with longer distance are considered 'distant', i.e. irrelevant floating ligands)
*/
@ModelParam // training
double ligc_prot_dist = 5.5
//==[ Features ]=========================================================================================================//
/**
* List of general calculated features
*/
@ModelParam
List<String> features = ["chem", "protrusion", "bfactor", "atom_table", "residue_table"]
/**
* List that is added to the features list on runtime.
* Useful in grid optimization mode for comparing different new features.
*/
@ModelParam
List<String> extra_features = []
/**
* List of features that come directly from atom type tables
* see atomic-properties.csv
*/
@ModelParam
List<String> atom_table_features = ["apRawValids","apRawInvalids","atomicHydrophobicity"]
/**
* List of features that come directly from residue tables
*/
@ModelParam
List<String> residue_table_features = []
/**
* List of feature filters that are applied to individual features (i.e. sub-features).
* If empty all individual features are used.
* Filters are applied sequentially.
*
* Examples of individual filters:
* <ul>
* <li> "*" - include all
* <li> "chem.*" - include all with prefix "chem."
* <li> "-chem.*" - exclude all with prefix "chem."
* <li> "chem.hydrophobicity" - include particular sub-feature
* <li> "-chem.hydrophobicity" - exclude particular sub-feature
* </ul>
*
* If the first filter in feature_filters starts with "-", include-all filter ("*") is implicitly applied to the front.
*
* Examples of full feature_filters values:
* <ul>
* <li> [] - include all
* <li> ["*"] - include all
* <li> ["*","-chem.*"] - include all except those with prefix "chem."
* <li> ["-chem.*"] - include all except those with prefix "chem."
* <li> ["-chem.*","chem.hydrophobicity"] - include all except those with prefix "chem.", but include "chem.hydrophobicity"
* <li> ["chem.hydrophobicity"] - include only "chem.hydrophobicity"
* <li> ["chem.*","-chem.hydrophobicity","-chem.atoms"] - include only those with prefix "chem.", except "chem.hydrophobicity" and "chem.atoms"
* </ul>
*/
@ModelParam
List<String> feature_filters = []
/**
* Exponent applied to all atom table features // TODO change default to 1
*/
@ModelParam
double atom_table_feat_pow = 2
/**
* Dummy param to preserve behaviour of older versions.
* Should be set to true for training new models.
*
* If true sign of value is reapplied after transformation by atom_table_feat_pow
*/
@ModelParam
boolean atom_table_feat_keep_sgn = false
/**
* radius for calculation protrusion feature
*/
@ModelParam
double protrusion_radius = 10
//===========================================================================================================//
/**
* Number of bins for protr_hist feature, must be >=2
*/
@ModelParam
int protr_hist_bins = 5
/**
* Param of protr_hist feature
*/
@ModelParam
boolean protr_hist_cumulative = false
/**
* Param of protr_hist feature
*/
@ModelParam
boolean protr_hist_relative = false
/**
* Number of bins for Atom Pair distance histogram (pair_hist) feature, must be >=2
*/
@ModelParam
int pair_hist_bins = 5
/**
* Radius capturing atoms considered in pair_hist feature
*/
@ModelParam
double pair_hist_radius = 6
/**
* smooth vs. sharp binning
*/
@ModelParam
boolean pair_hist_smooth = false
/**
* apply normalization to histogram
*/
@ModelParam
boolean pair_hist_normalize = false
/**
* if false only protein exposed atoms are considered
*/
@ModelParam
boolean pair_hist_deep = true
/**
* size of random subsample of atom pairs, 0 = all
*/
@ModelParam
int pair_hist_subsample_limit = 0
//===========================================================================================================//
/**
* Load sequence conservation data
*/
@Deprecated
@RuntimeParam
boolean load_conservation = false
/**
* Pocket scoring algorithm
*/
@ModelParam
String score_pockets_by = "p2rank" // possible values: "p2rank", "conservation", "combi"
/**
* Conservation exponent for re-scoring pockets
*/
@ModelParam
int conservation_exponent = 1
/**
* Radius for calculating conservation cloud related features
*/
@ModelParam
double conserv_cloud_radius = 10
/**
* Radius for calculating secondary structure cloud related features
*/
@ModelParam
double ss_cloud_radius = 10
/**
* Directories in which to look for conservation score files.
* Path is absolute or relative to the dataset directory.
* If null or empty: look in the same directory as protein file
*/
@RuntimeParam
List<String> conservation_dirs = []
@RuntimeParam
List<String> electrostatics_dirs = []
/**
* Log scores for binding and nonbinding scores to file
*/
@RuntimeParam
String log_scores_to_file = ""
/**
* limits how many pocket SAS points are used for scoring (after sorting), 0=unlimited
* affects scoring pockets and also residues
*/
@ModelParam
int score_point_limit = 0
//==[ Classifiers ]=========================================================================================================//
/**
* see ClassifierOption
*/
@ModelParam
String classifier = "FastRandomForest"
/**
* see ClassifierOption
*/
@ModelParam
String inner_classifier = "FastRandomForest"
/**
* see ClassifierOption
*/
@ModelParam
int meta_classifier_iterations = 5
/**
* works only with classifier "CostSensitive_RF"
*/
@ModelParam // training
double false_positive_cost = 2
//=== Random Forests =================
/**
* RandomForest trees
*/
@ModelParam // training
int rf_trees = 100
/**
* RandomForest depth limit, 0=unlimited
*/
@ModelParam // training
int rf_depth = 0
/**
* RandomForest feature subset size for one tree, 0=default(sqrt)
*/
@ModelParam // training
int rf_features = 0
/**
* number of threads used in RandomForest training (0=use value of threads param)
*/
@RuntimeParam // training
int rf_threads = 0
/**
* size of a bag: 1..100% of the dataset
*/
@ModelParam // training
int rf_bagsize = 100
/**
* Flatten random forest after loading if possible
*/
@RuntimeParam
@ModelParam // training
boolean rf_flatten = false
/**
* Flatten random forest in a way that has exactly the same output
* by preserving weird way tree results are aggregated in FastRandomForest.
*/
@RuntimeParam
@ModelParam // training
boolean rf_flatten_as_legacy = true
/**
* try predict in batches if possible
*/
@RuntimeParam
boolean rf_batch_prediction = true
/**
* Fix bug in RF libraries where class probabilities on leaves were not properly normalized.
* Valid for FasterForest and FasterForest2. FastRandomForest has the bug (but not the fix).
*/
@ModelParam // training
boolean rf_ensure_leaves_normalized = false
/**
* cutoff for joining ligand atom groups into one ligand
*/
@ModelParam // training
double ligand_clustering_distance = 1.7 // ~= covalent bond length
/**
* cutoff around ligand that defines positives
*/
@ModelParam
double positive_point_ligand_distance = 2.5
/**
* distance around ligand atoms that define ligand induced volume
* (for evaluation by some criteria, DSO, ligand coverage...)
*/
@ModelParam
double ligand_induced_volume_cutoff = 2.5
/**
* points between (positive_point_ligand_distance, positive_point_ligand_distance + neutral_point_margin) will not be considered positives or negatives and will be left out form training
*/
@ModelParam // training
double neutral_points_margin = 5.5
/**
* Neighbourhood radius (A) used for calculating most of the features.
*/
@ModelParam
double neighbourhood_radius = 8
/**
* HETATM groups that are ignored (not marked as relevant ligands, e.g because they are cofactors or part of a substrate)
*/
@ModelParam // training
List<String> ignore_het_groups = ["HOH","DOD","WAT","NAG","MAN","UNK","GLC","ABA","MPD","GOL","SO4","PO4"]
/**
* Which ligand types define positive SAS points.
* accepted values: "relevant", "ignored", "small", "distant"
*/
@ModelParam // training
List<String> positive_def_ligtypes = ["relevant"]
/**
* Minimal heavy atom count for relevant ligands, other ligands are considered too small and ignored
*/
@ModelParam // training
int min_ligand_atoms = 5
/**
* Point sampler for extracting instances for training.
* P2Rank and PRANK use SurfacePointSampler that produces SAS points.
* Others like GridPointSampler are experimental, and also deprecated. see point_sampling_strategy
*/
@ModelParam
String point_sampler = "SurfacePointSampler"
/**
* surface | atoms | grid
*/
@Beta
String point_sampling_strategy = "surface"
/**
* multiplier for random point sampling
*/
@ModelParam // training
int sampling_multiplier = 3
/**
* solvent radius for SAS surface
*/
@ModelParam
double solvent_radius = 1.6
/**
* SAS tessellation (~density) used in prediction step.
* Higher tessellation = higher density (+1 ~~ x4 points)
*/
@ModelParam
int tessellation = 2
/**
* SAS tessellation (~density) used in training step
* 0 = use value of tessellation
*/
@ModelParam // training
int train_tessellation = 2
/**
* SAS tessellation (~density) used in training step to select negatives.
* Allows denser positive sampling than negative sampling and thus deal with class imbalance and train faster.
* 0 = use value of effective train_tessellation
*/
@ModelParam // training
int train_tessellation_negatives = 2
/**
* for grid and random sampling
*/
@ModelParam
double point_min_distfrom_protein = 2.5
/**
* for grid and random sampling
*/
@ModelParam
double point_max_distfrom_pocket = 4.5
/**
* grid cell size for grid sampling strategy (and old GridPointSampler)
*/
@ModelParam
double grid_cell_edge = 2
/**
* Cutoff radius around protein atoms. Grid points with higher distance to closest protein atom are discarded.
*/
@ModelParam
double grid_cutoff_radius = 3.4
/**
* Restrict training set size, 0=unlimited
*/
@RuntimeParam // training
int max_train_instances = 0
/**
* Param of SAS score weighting function (see WeightFun)
*/
@ModelParam
double weight_power = 2
/**
* Param of SAS score weighting function (see WeightFun)
*/
@ModelParam
double weight_sigma = 2.2
/**
* Param of SAS score weighting function (see WeightFun)
*/
@ModelParam
double weight_dist_param = 4.5
/**
* Choice of SAS score weighting function (see WeightFun)
*/
@ModelParam
String weight_function = "INV"
/**
* If false only single layer of proteins solvent exposed atoms is used for calculating features that are projected from protein atoms to SAS points
*/
@ModelParam
boolean deep_surrounding = false
/** calculate feature vectors from smooth atom feature representation
* (instead of directly from atom properties)
*/
@Deprecated
@ModelParam
boolean smooth_representation = false
/**
* related to smooth_representation
*/
@Deprecated
@ModelParam
double smoothing_radius = 4.5
/**
* determines how atom feature vectors are projected on to SAS point feature vector
* if true, atom feature vectors are averaged
* else they are only summed up
*/
@ModelParam
boolean average_feat_vectors = false
/**
* in feature projection from atoms to SAS points:
* only applicable when average_feat_vectors=true
* <0,1> goes from 'no average, just sum' -> 'full average'
*/
@ModelParam
double avg_pow = 1
/**
* regarding feature projection from atoms to SAS points: calculate weighted average
* (should be true by default, kept false for backward compatibility reasons)
*/
@ModelParam
boolean avg_weighted = false
/**
* exponent of point ligandability score (before adding it to pocket score)
*/
@ModelParam
double point_score_pow = 2
/**
* exponent of point ligandability score (before adding it to residue score in residue prediction mode)
* value less than 0 refers to the value of point_score_pow
*/
@ModelParam
double residue_point_score_pow = -1
/**
* Binary classifiers produces histogram of scores for class0 and class1
* if true only score for class1 is considered
* makes a difference only if histogram produced by classifier doesn't sum up to 1
*/
@ModelParam
boolean use_only_positive_score = true
/**
* If true trained models will not be saved to disk (good for parameter optimization)
*/
@RuntimeParam
boolean delete_models = false
/**
* delete files containing training/evaluation feature vectors
*/
@RuntimeParam
boolean delete_vectors = true
/**
* check all loaded/calculated vectors for invalid (NaN) values
*/
@RuntimeParam
boolean check_vectors = false
/**
* collect vectors also from eval dataset (only makes sense in combination with delete_vectors=false)
*/
@RuntimeParam
boolean collect_eval_vectors = false
/**
* collect vectors only at the beginning of seed loop routine
* if dataset is sub-sampled (using train_protein_limit param) then dataset is sub-sampled only once
* set to false when calculating learning curve!
* train_protein_limit>0 should be always paired with collect_only_once=false
*/
@RuntimeParam
boolean collect_only_once = true
// /**
// * export vectors describing SAS points used during prediction
// * export is a table file containing: SAS point 3D coordinates, calculated features, predicted raw point ligandability score
// * see export_points_format
// */
// @RuntimeParam
// boolean export_points = false
// /**
// * format of the point export file
// *
// * relevant only if export_points=true
// *
// * Available options: "csv", "csv.gz"
// */
// @RuntimeParam
// boolean export_points_format = "csv"
/**
* number of random seed iterations
*
* Only relevant when training and evaluating new models.
* Result metrics are then averaged or calculated for sum of runs (where appropriate, like F1 measure).
* Example: using running traineval with loop=10 will do ten runs with different random seed and calculate averages.
*/
@RuntimeParam
int loop = 1
/**
* keep datasets (structures and SAS points) in memory between crossval/seedloop iterations
*/
@RuntimeParam
boolean cache_datasets = false
/**
* calculate feature importances
* available only for some classifiers
*/
@RuntimeParam
boolean feature_importances = false
/**
* produce visualisations
*/
@RuntimeParam
boolean visualizations = true
/**
* Renderers used to produce visualizations. Available renderers: [pymol, chimerax]
*/
@RuntimeParam
List<String> vis_renderers = ["pymol", "chimerax"]
/**
* visualize all surface points (not just inner pocket points)
*/
@RuntimeParam
boolean vis_all_surface = false
/**
* copy all protein pdb files to visualization folder (making visualizations portable)
*/
@RuntimeParam
boolean vis_copy_proteins = true
/**
* generate new protein pdb files from structures in memory instead of reusing input files
* (useful when structures were manipulated in memory, e.g. when reducing to specified chains)
*/
@RuntimeParam
boolean vis_generate_proteins = true
/**
* Highlight ligands by rendering them as enlarged balls (instead of sticks).
* Necessary to see 1 atom ligands like ions.
* Affects rendering only in pocket mode.
*/
@RuntimeParam
boolean vis_highlight_ligands = false
/**
* zip PyMol visualizations to save space
*/
@Deprecated
@RuntimeParam
boolean zip_visualizations = false
/**
* use strictly inner pocket points or more wider pocket neighbourhood
*/
@RuntimeParam
boolean strict_inner_points = false
/**
* cross-validation folds
*/
@RuntimeParam
int folds = 5
/**
* collect evaluations for top [n+0, n+1,...] pockets (n is true pocket count)
*/
@RuntimeParam
List<Integer> eval_tolerances = [0,1,2,4,10,99]
/**
* Calculate pocket predictions.
* This is a main switch between re-scoring of predictions by other methods (PRANK) and pocket prediction (P2Rank)
*/
@RuntimeParam
boolean predictions = true
/**
* Residue prediction mode (as opposed to full pocket prediction mode)
*/
@RuntimeParam
boolean predict_residues = false
/**
* If true, assign class to SAS points in training dataset based on proximity to the ligand.
* If false, assign class based the class of nearest residue.
* Distinction only makes sense running in residue prediction mode (predict_residues = true).
*/
@RuntimeParam
boolean ligand_derived_point_labeling = true
/**
* produce residue labeling file (in predict mode)
*
* Even in full pocket prediction mode (predict_residues=false) we can label and score residues using transformers.
*/
@RuntimeParam
boolean label_residues = true
/**
* residue score threshold for calculating predicted binary label
*/
@ModelParam
double residue_score_threshold = 1d
/**
* in calculation of residue score from neighboring SAS points:
* <0,1> goes from 'no average, just sum' -> 'full average'
*/
@ModelParam
double residue_score_sum_to_avg = 0d
/**
* added to the cutoff distance around residue in score aggregation from SAS points
* full distance cutoff R around residue atoms is calculated as follows:
* R = solvent_radius + surface_additional_cutoff + residue_score_extra_dist
*/
@ModelParam
double residue_score_extra_dist = 0d
/**
* Calculate residue scores only for exposed residues (inner will have score 0)
* => only exposed residues can be predicted as positive.
* Makes sense only in combination with point_sampling_strategy=surface.
*/
@ModelParam
boolean residue_score_only_exposed = false
/**
* residue score transform function
*
* NONE: identity .. score will be in range <0,inf)
* SIGMOID: score will be transformed to range <0,1)
*/
@ModelParam
String residue_score_transform = "NONE"
/**
* minimum ligandability score for SAS point to be considered ligandable
*/
@ModelParam
double pred_point_threshold = 0.4
/**
* minimum cluster size (of ligandable points) for initial clustering
*/
@ModelParam
int pred_min_cluster_size = 3
/**
* clustering distance for ligandable clusters for second phase clustering
*/
@ModelParam
double pred_clustering_dist = 5
/**
* SAS points around ligandable points (an their score) will be included in the pocket
*/
@ModelParam
double extended_pocket_cutoff = 3.5
/**
* cutoff distance of protein surface atoms considered as part of the pocket
*/
@ModelParam
double pred_protein_surface_cutoff = 3.5
/**
* Prefix output directory with date and time
*/
@RuntimeParam
boolean out_prefix_date = false
/**
* Place all output files in this sub-directory of the output directory
*/
@RuntimeParam
String out_subdir = null
/**
* Balance SAS point score weight by density (points in denser areas will have lower weight)
*/
@ModelParam
boolean balance_density = false
/**
* Radius for balancing of SAS point score weight
*/
@ModelParam
double balance_density_radius = 2
/**
* output detailed tables for all proteins, ligands and pockets or residues
*/
@RuntimeParam
boolean log_cases = true
/**
* cutoff for protein exposed atoms calculation (distance from SAS surface is solv.radius. + surf_cutoff)
*/
@ModelParam
double surface_additional_cutoff = 1.8
/**
* collect negatives just from decoy pockets found by other method
* (alternatively take negative points from all of the protein's surface)
*/
@ModelParam // training
boolean sample_negatives_from_decoys = false
/**
* cutoff around ligand atoms to select negatives, 0=all
* valid if training from whole surface (sample_negatives_from_decoys=false)
*/
@ModelParam // training
double train_lig_cutoff = 0
/**
* n, use only top-n pockets to select training instances, 0=all
*/
@ModelParam // training
int train_pockets = 0
/**
* clear primary caches (protein structures) between runs (when iterating params or seed)
*/
@RuntimeParam // training
boolean clear_prim_caches = false
/**
* clear secondary caches (protein surfaces etc.) between runs (when iterating params or seed)
*/
@RuntimeParam // training
boolean clear_sec_caches = false
/**
* Select pocket re-scoring algorithm when running in re-scoring mode (predictions=false).
*
* Published PRANK (2015) = "ModelBasedRescorer"
*/
@ModelParam
String rescorer = "ModelBasedRescorer"
/**
* Parameter of the PLBIndexRescorer algorithm.
*/
@ModelParam
boolean plb_rescorer_atomic = false
/**
* stop processing the dataset on the first unrecoverable error with a dataset item
*/
@RuntimeParam
boolean fail_fast = false
/**
* Fail when (X-masked) sequences in the structure and in the conservation score file do not match exactly.
* Has effect only when fail_fast = true.
*/
@RuntimeParam
boolean fail_on_conserv_seq_mismatch = false
/**
* target class ratio of positives/negatives we train on.
* relates to subsampling and supersampling
*/
@RuntimeParam // training
double target_class_ratio = 0.1
/**
* in training use subsampling to deal with class imbalance
*/
@RuntimeParam // training
boolean subsample = false
/**
* in training use supersampling to deal with class imbalance
*/
@RuntimeParam // training
boolean supersample = false
/**
* sort negatives desc by protrusion before subsampling
*/
@RuntimeParam // training
boolean subsampl_high_protrusion_negatives = false
/**
* don't produce prediction files for individual proteins (useful for long repetitive experiments)
*/
@RuntimeParam
boolean output_only_stats = false
/**
* compress results of individual ploop runs
*/
@RuntimeParam
boolean ploop_zip_runs = false
/**
* delete results of individual ploop/hopt runs
*/
@RuntimeParam
boolean ploop_delete_runs = false
/**
* logging level (TRACE/DEBUG/INFO/WARN/ERROR)
*/
@RuntimeParam
String log_level = "INFO"
/**
* print log messages to console
*/
@RuntimeParam
boolean log_to_console = true
/**
* print log messages to file (run.log in outdir)
*/