From d35f6f70767a1af00722c973d0d50bebfaf07712 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 12 May 2024 18:25:10 +0200 Subject: [PATCH 1/5] Fix compiler warnings [-Wzero-as-null-pointer-constant] Signed-off-by: Stefan Weil --- src/api/pagerenderer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api/pagerenderer.cpp b/src/api/pagerenderer.cpp index d2bee7d584..a6541ba91d 100644 --- a/src/api/pagerenderer.cpp +++ b/src/api/pagerenderer.cpp @@ -391,7 +391,7 @@ Pta *SortBaseline(Pta *baseline_pts, Pta *sorted_baseline_pts; sorted_baseline_pts = - ptaSort(baseline_pts, L_SORT_BY_X, L_SORT_INCREASING, NULL); + ptaSort(baseline_pts, L_SORT_BY_X, L_SORT_INCREASING, nullptr); do { ptaGetPt(sorted_baseline_pts, index, &x0, &y0); @@ -557,8 +557,8 @@ Pta *FitBaselineIntoLinePolygon(Pta *bottom_pts, Pta *baseline_pts, // Calculate quartiles to find outliers numaGetMedian(poly_bl_delta, &delta_median); - numaGetRankValue(poly_bl_delta, 0.25, NULL, 0, &delta_median_Q1); - numaGetRankValue(poly_bl_delta, 0.75, NULL, 0, &delta_median_Q3); + numaGetRankValue(poly_bl_delta, 0.25, nullptr, 0, &delta_median_Q1); + numaGetRankValue(poly_bl_delta, 0.75, nullptr, 0, &delta_median_Q3); // Fit baseline into the polygon // Todo: Needs maybe some adjustments to suppress fitting to superscript From e38bc38ef97a0c1dcd05473dc251fee3c5444ad2 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 12 May 2024 18:31:00 +0200 Subject: [PATCH 2/5] Fix compiler warnings [-Wunreachable-code-return] Signed-off-by: Stefan Weil --- src/training/common/networkbuilder.cpp | 1 - src/wordrec/chopper.cpp | 2 +- unittest/applybox_test.cc | 1 - unittest/baseapi_test.cc | 5 ----- 4 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/training/common/networkbuilder.cpp b/src/training/common/networkbuilder.cpp index 5c381ca9ad..5a0d91715c 100644 --- a/src/training/common/networkbuilder.cpp +++ b/src/training/common/networkbuilder.cpp @@ -114,7 +114,6 @@ Network *NetworkBuilder::BuildFromString(const StaticShape &input_shape, const c return ParseOutput(input_shape, str); default: tprintf("Invalid network spec:%s\n", *str); - return nullptr; } return nullptr; } diff --git a/src/wordrec/chopper.cpp b/src/wordrec/chopper.cpp index 195af68ef6..09a2333e11 100644 --- a/src/wordrec/chopper.cpp +++ b/src/wordrec/chopper.cpp @@ -344,7 +344,7 @@ SEAM *Wordrec::improve_one_blob(const std::vector &blob_choices, // TODO(rays) it may eventually help to allow italic_blob to be true, seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array); if (seam != nullptr) { - return seam; // Success! + break; // Success! } if (blob_choices[*blob_number] == nullptr) { return nullptr; diff --git a/unittest/applybox_test.cc b/unittest/applybox_test.cc index cf7456679e..80d80470d3 100644 --- a/unittest/applybox_test.cc +++ b/unittest/applybox_test.cc @@ -64,7 +64,6 @@ class ApplyBoxTest : public testing::Test { if (!SetImage(imagefile)) { // eng.traineddata not found or other problem during Init. GTEST_SKIP(); - return; } if (line_mode) { api_.SetVariable("tessedit_resegment_from_line_boxes", "1"); diff --git a/unittest/baseapi_test.cc b/unittest/baseapi_test.cc index 7c70b13dd5..4808fb3bad 100644 --- a/unittest/baseapi_test.cc +++ b/unittest/baseapi_test.cc @@ -124,7 +124,6 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) { if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) { // eng.traineddata not found. GTEST_SKIP(); - return; } Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str()); CHECK(src_pix); @@ -143,7 +142,6 @@ TEST_F(TesseractTest, HOCRContainsBaseline) { if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) { // eng.traineddata not found. GTEST_SKIP(); - return; } Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str()); CHECK(src_pix); @@ -179,7 +177,6 @@ TEST_F(TesseractTest, AdaptToWordStrTest) { if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) { // eng.traineddata not found. GTEST_SKIP(); - return; } api.SetVariable("matcher_sufficient_examples_for_prototyping", "1"); api.SetVariable("classify_class_pruner_threshold", "220"); @@ -215,7 +212,6 @@ TEST_F(TesseractTest, BasicLSTMTest) { if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) { // eng.traineddata not found. GTEST_SKIP(); - return; } Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str()); CHECK(src_pix); @@ -239,7 +235,6 @@ TEST_F(TesseractTest, LSTMGeometryTest) { if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) { // eng.traineddata not found. GTEST_SKIP(); - return; } api.SetImage(src_pix); ASSERT_EQ(api.Recognize(nullptr), 0); From 4e01d58cb61471535e14c8e52d7c0ffb607d6737 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 12 May 2024 18:46:42 +0200 Subject: [PATCH 3/5] Fix compiler warnings [-Wold-style-cast] Signed-off-by: Stefan Weil --- src/ccstruct/points.h | 4 ++-- src/dict/dawg.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ccstruct/points.h b/src/ccstruct/points.h index 418031f227..59793592a1 100644 --- a/src/ccstruct/points.h +++ b/src/ccstruct/points.h @@ -77,7 +77,7 @@ class ICOORD { /// find sq length float sqlength() const { - return (float)(xcoord * xcoord + ycoord * ycoord); + return static_cast(xcoord * xcoord + ycoord * ycoord); } /// find length @@ -101,7 +101,7 @@ class ICOORD { /// find angle float angle() const { - return (float)std::atan2(ycoord, xcoord); + return std::atan2(static_cast(ycoord), static_cast(xcoord)); } /// test equality diff --git a/src/dict/dawg.h b/src/dict/dawg.h index f3176bdf90..408fa2ca96 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -34,10 +34,10 @@ #ifndef __GNUC__ # ifdef _WIN32 -# define NO_EDGE (int64_t)0xffffffffffffffffi64 +# define NO_EDGE static_cast(0xffffffffffffffffi64) # endif /*_WIN32*/ #else -# define NO_EDGE (int64_t)0xffffffffffffffffll +# define NO_EDGE static_cast(0xffffffffffffffffll) #endif /*__GNUC__*/ namespace tesseract { @@ -74,12 +74,12 @@ enum DawgType { C o n s t a n t s ----------------------------------------------------------------------*/ -#define FORWARD_EDGE (int32_t)0 -#define BACKWARD_EDGE (int32_t)1 -#define MAX_NODE_EDGES_DISPLAY (int64_t)100 -#define MARKER_FLAG (int64_t)1 -#define DIRECTION_FLAG (int64_t)2 -#define WERD_END_FLAG (int64_t)4 +#define FORWARD_EDGE static_cast(0) +#define BACKWARD_EDGE static_cast(1) +#define MAX_NODE_EDGES_DISPLAY static_cast(100) +#define MARKER_FLAG static_cast(1) +#define DIRECTION_FLAG static_cast(2) +#define WERD_END_FLAG static_cast(4) #define LETTER_START_BIT 0 #define NUM_FLAG_BITS 3 #define REFFORMAT "%" PRId64 From 1f1ee6b7708d2d4f7de0266ece5a9659cecdce4f Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 19 May 2024 18:10:38 +0200 Subject: [PATCH 4/5] Fix compiler warnings [-Wunused-but-set-variable] - Use conditional compilation for variables which might be useful for debugging. - Remove all other unused variables. Signed-off-by: Stefan Weil --- src/api/baseapi.cpp | 6 ++++++ src/ccmain/osdetect.cpp | 4 ++++ src/classify/normmatch.cpp | 3 --- src/textord/topitch.cpp | 6 ------ src/training/cntraining.cpp | 4 ++++ src/training/common/errorcounter.cpp | 4 ++++ src/training/common/sampleiterator.cpp | 2 -- 7 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 3a134980b0..06f8331138 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1402,7 +1402,9 @@ char *TessBaseAPI::GetTSVText(int page_number) { return nullptr; } +#if !defined(NDEBUG) int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; +#endif int page_id = page_number + 1; // we use 1-based page numbers. int page_num = page_id; @@ -1484,6 +1486,7 @@ char *TessBaseAPI::GetTSVText(int page_number) { tsv_str += "\t" + std::to_string(res_it->Confidence(RIL_WORD)); tsv_str += "\t"; +#if !defined(NDEBUG) // Increment counts if at end of block/paragraph/textline. if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) { lcnt++; @@ -1494,13 +1497,16 @@ char *TessBaseAPI::GetTSVText(int page_number) { if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) { bcnt++; } +#endif do { tsv_str += std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); tsv_str += "\n"; // end of row +#if !defined(NDEBUG) wcnt++; +#endif } char *ret = new char[tsv_str.length() + 1]; diff --git a/src/ccmain/osdetect.cpp b/src/ccmain/osdetect.cpp index dcc1aa5297..64a8bd6375 100644 --- a/src/ccmain/osdetect.cpp +++ b/src/ccmain/osdetect.cpp @@ -223,7 +223,9 @@ int orientation_and_script_detection(const char *filename, OSResults *osr, // Returns a non-zero number of blobs if the page was successfully processed, or // zero if the page had too few characters to be reliable int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess) { +#if !defined(NDEBUG) int blobs_total = 0; +#endif TO_BLOCK_IT block_it; block_it.set_to_list(port_blocks); @@ -241,7 +243,9 @@ int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract * BLOBNBOX *bbox = bbox_it.data(); C_BLOB *blob = bbox->cblob(); TBOX box = blob->bounding_box(); +#if !defined(NDEBUG) ++blobs_total; +#endif // Catch illegal value of box width and avoid division by zero. if (box.width() == 0) { diff --git a/src/classify/normmatch.cpp b/src/classify/normmatch.cpp index ea79ad6398..7b132f2724 100644 --- a/src/classify/normmatch.cpp +++ b/src/classify/normmatch.cpp @@ -112,7 +112,6 @@ float Classify::ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId)); } - int ProtoId = 0; iterate(Protos) { auto Proto = reinterpret_cast(Protos->first_node()); float Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY]; @@ -145,8 +144,6 @@ float Classify::ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature if (Match < BestMatch) { BestMatch = Match; } - - ProtoId++; } return 1.0 - NormEvidenceOf(BestMatch); } /* ComputeNormMatch */ diff --git a/src/textord/topitch.cpp b/src/textord/topitch.cpp index 15fe23b8c5..ece82db682 100644 --- a/src/textord/topitch.cpp +++ b/src/textord/topitch.cpp @@ -151,7 +151,6 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix int like_votes; // votes over page int other_votes; // votes of unlike blocks int block_index; // number of block - int row_index; // number of row int maxwidth; // max pitch TO_BLOCK_IT block_it = blocks; // block iterator TO_BLOCK *block; // current block @@ -172,7 +171,6 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix if (pb != nullptr && !pb->IsText()) { continue; // Non text doesn't exist! } - row_index = 1; TO_ROW_IT row_it(block->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); @@ -226,7 +224,6 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix other_votes--; } } - row_index++; } block_index++; } @@ -518,7 +515,6 @@ bool try_rows_fixed( // find line stats bool testing_on // correct orientation ) { TO_ROW *row; // current row - int32_t row_index; // row number. int32_t def_fixed = 0; // counters int32_t def_prop = 0; int32_t maybe_fixed = 0; @@ -529,7 +525,6 @@ bool try_rows_fixed( // find line stats float lower, upper; // cluster thresholds TO_ROW_IT row_it = block->get_rows(); - row_index = 1; for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); ASSERT_HOST(row->xheight > 0); @@ -541,7 +536,6 @@ bool try_rows_fixed( // find line stats row->kern_size = lower; } } - row_index++; } count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop, dunno); diff --git a/src/training/cntraining.cpp b/src/training/cntraining.cpp index fdcf047a48..6116ba87f1 100644 --- a/src/training/cntraining.cpp +++ b/src/training/cntraining.cpp @@ -116,7 +116,9 @@ int main(int argc, char *argv[]) { InitFeatureDefs(&FeatureDefs); ParseArguments(&argc, &argv); +#if !defined(NDEBUG) int num_fonts = 0; +#endif for (const char *PageName = *++argv; PageName != nullptr; PageName = *++argv) { printf("Reading %s ...\n", PageName); FILE *TrainingPage = fopen(PageName, "rb"); @@ -124,7 +126,9 @@ int main(int argc, char *argv[]) { if (TrainingPage) { ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE, 100, nullptr, TrainingPage, &CharList); fclose(TrainingPage); +#if !defined(NDEBUG) ++num_fonts; +#endif } } printf("Clustering ...\n"); diff --git a/src/training/common/errorcounter.cpp b/src/training/common/errorcounter.cpp index 0adf5d8067..3d5a5bbb4f 100644 --- a/src/training/common/errorcounter.cpp +++ b/src/training/common/errorcounter.cpp @@ -114,7 +114,9 @@ void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifi ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize); std::vector results; +#if !defined(NDEBUG) int total_samples = 0; +#endif int error_samples = 25; int total_new_errors = 0; // Iterate over all the samples, accumulating errors. @@ -145,7 +147,9 @@ void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifi } } } +#if !defined(NDEBUG) ++total_samples; +#endif } tprintf("Total new errors = %d\n", total_new_errors); } diff --git a/src/training/common/sampleiterator.cpp b/src/training/common/sampleiterator.cpp index f7de208987..fc56946044 100644 --- a/src/training/common/sampleiterator.cpp +++ b/src/training/common/sampleiterator.cpp @@ -240,11 +240,9 @@ int SampleIterator::UniformSamples() { // to 1. Returns the minimum assigned sample weight. double SampleIterator::NormalizeSamples() { double total_weight = 0.0; - int sample_count = 0; for (Begin(); !AtEnd(); Next()) { const TrainingSample &sample = GetSample(); total_weight += sample.weight(); - ++sample_count; } // Normalize samples. double min_assigned_sample_weight = 1.0; From 48d78b7af29577f3cd5b4f3e669169d2c9eebfb2 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 19 May 2024 18:27:38 +0200 Subject: [PATCH 5/5] Avoid some float / double conversions in class Classify This also fixes several compiler warnings ([-Wimplicit-float-conversion], [-Wdouble-promotion]). Signed-off-by: Stefan Weil --- src/classify/normmatch.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/classify/normmatch.cpp b/src/classify/normmatch.cpp index 7b132f2724..6ea75b9962 100644 --- a/src/classify/normmatch.cpp +++ b/src/classify/normmatch.cpp @@ -52,17 +52,17 @@ struct NORM_PROTOS { * normalization adjustment. The equation that represents the transform is: * 1 / (1 + (NormAdj / midpoint) ^ curl) */ -static double NormEvidenceOf(double NormAdj) { - NormAdj /= classify_norm_adj_midpoint; +static float NormEvidenceOf(float NormAdj) { + NormAdj /= static_cast(classify_norm_adj_midpoint); if (classify_norm_adj_curl == 3) { NormAdj = NormAdj * NormAdj * NormAdj; } else if (classify_norm_adj_curl == 2) { NormAdj = NormAdj * NormAdj; } else { - NormAdj = pow(NormAdj, classify_norm_adj_curl); + NormAdj = std::pow(NormAdj, static_cast(classify_norm_adj_curl)); } - return (1.0 / (1.0 + NormAdj)); + return (1 / (1 + NormAdj)); } /*---------------------------------------------------------------------------- @@ -73,7 +73,7 @@ static double NormEvidenceOf(double NormAdj) { double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); /** Weight of width variance against height and vertical position. */ -const double kWidthErrorWeighting = 0.125; +const float kWidthErrorWeighting = 0.125f; /*---------------------------------------------------------------------------- Public Code @@ -102,7 +102,7 @@ float Classify::ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature float Match = (feature.Params[CharNormLength] * feature.Params[CharNormLength] * 500.0f + feature.Params[CharNormRx] * feature.Params[CharNormRx] * 8000.0f + feature.Params[CharNormRy] * feature.Params[CharNormRy] * 8000.0f); - return (1.0f - NormEvidenceOf(Match)); + return (1 - NormEvidenceOf(Match)); } float BestMatch = FLT_MAX; @@ -145,7 +145,7 @@ float Classify::ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature BestMatch = Match; } } - return 1.0 - NormEvidenceOf(BestMatch); + return 1 - NormEvidenceOf(BestMatch); } /* ComputeNormMatch */ void Classify::FreeNormProtos() {