From 2d668a3cfcf1a8c276f8e2fd4204a53bdc23227a Mon Sep 17 00:00:00 2001 From: mbootsector Date: Tue, 6 Oct 2015 08:15:17 +0200 Subject: [PATCH 01/21] Lazy smp Start all threads searching on root position and use only the shared TT table as synching scheme. It seems this scheme scales better than YBWC for high number of threads. Tested at very LTC (120+0.1) with 23 threads ELO: 35.52 +-9.6 (95%) LOS: 100.0% Total: 1109 W: 183 L: 70 D: 856 Tested at LTC with 23 threads ELO: 34.41 +-9.9 (95%) LOS: 100.0% Total: 1094 W: 184 L: 76 D: 834 Tested at LTC with 7 threads ELO: 8.76 +-5.0 (95%) LOS: 100.0% Total: 5000 W: 735 L: 609 D: 3656 Tested at STC with 7 threads ELO: 16.76 +-5.4 (95%) LOS: 100.0% Total: 5000 W: 899 L: 658 D: 3443 Bench: 8397672 --- src/benchmark.cpp | 2 +- src/movepick.cpp | 7 - src/search.cpp | 751 +++++++++++++++++++--------------------------- src/search.h | 2 - src/thread.cpp | 161 +--------- src/thread.h | 73 +---- src/timeman.h | 3 +- 7 files changed, 330 insertions(+), 669 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 5c4c4e36236..c683a38170d 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -158,7 +158,7 @@ void benchmark(const Position& current, istream& is) { Search::StateStackPtr st; Threads.start_thinking(pos, limits, st); Threads.main()->join(); - nodes += Search::RootPos.nodes_searched(); + nodes += Threads.nodes_searched(); } } diff --git a/src/movepick.cpp b/src/movepick.cpp index 7cf3e607232..1f01aaafc95 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -320,10 +320,3 @@ Move MovePicker::next_move() { } } } - - -/// Version of next_move() to use at split point nodes where the move is grabbed -/// from the split point's shared MovePicker object. This function is not thread -/// safe so must be lock protected by the caller. -template<> -Move MovePicker::next_move() { return ss->splitPoint->movePicker->next_move(); } diff --git a/src/search.cpp b/src/search.cpp index 4988dc8e676..61796406c5e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -39,9 +39,8 @@ namespace Search { volatile SignalsType Signals; LimitsType Limits; - RootMoveVector RootMoves; - Position RootPos; StateStackPtr SetupStates; + CounterMovesHistoryStats CounterMovesHistory; } namespace Tablebases { @@ -107,7 +106,7 @@ namespace { assert(newPv.size() >= 3); - // Keep track of how many times in a row 3rd ply remains stable + // Keep track of how many times in a row 3rd ply remains stable stableCnt = (newPv[2] == pv[2]) ? stableCnt + 1 : 0; if (!std::equal(newPv.begin(), newPv.begin() + 3, pv)) @@ -128,21 +127,17 @@ namespace { Move pv[3]; }; - size_t PVIdx; EasyMoveManager EasyMove; double BestMoveChanges; Value DrawValue[COLOR_NB]; - HistoryStats History; - CounterMovesHistoryStats CounterMovesHistory; - MovesStats Countermoves; - template + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth); - void id_loop(Position& pos); + void id_loop(); Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply); void update_pv(Move* pv, Move move, Move* childPv); @@ -185,9 +180,12 @@ void Search::init() { void Search::reset () { TT.clear(); - History.clear(); CounterMovesHistory.clear(); - Countermoves.clear(); + + for (Thread* th : Threads) { + th->History.clear(); + th->Countermoves.clear(); + } } @@ -227,8 +225,9 @@ template uint64_t Search::perft(Position& pos, Depth depth); void Search::think() { - Color us = RootPos.side_to_move(); - Time.init(Limits, us, RootPos.game_ply(), now()); + MainThread* mth = Threads.main(); // Shorthand + Color us = mth->pos.side_to_move(); + Time.init(Limits, us, mth->pos.game_ply(), now()); int contempt = Options["Contempt"] * PawnValueEg / 100; // From centipawns DrawValue[ us] = VALUE_DRAW - Value(contempt); @@ -247,21 +246,21 @@ void Search::think() { TB::ProbeDepth = DEPTH_ZERO; } - if (RootMoves.empty()) + if (mth->rootMoves.empty()) { - RootMoves.push_back(RootMove(MOVE_NONE)); + mth->rootMoves.push_back(RootMove(MOVE_NONE)); sync_cout << "info depth 0 score " - << UCI::value(RootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) + << UCI::value(mth->pos.checkers() ? -VALUE_MATE : VALUE_DRAW) << sync_endl; } else { - if (TB::Cardinality >= RootPos.count(WHITE) - + RootPos.count(BLACK)) + if (TB::Cardinality >= mth->pos.count(WHITE) + + mth->pos.count(BLACK)) { // If the current root position is in the tablebases then RootMoves // contains only moves that preserve the draw or win. - TB::RootInTB = Tablebases::root_probe(RootPos, RootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe(mth->pos, mth->rootMoves, TB::Score); if (TB::RootInTB) TB::Cardinality = 0; // Do not probe tablebases during the search @@ -269,7 +268,7 @@ void Search::think() { else // If DTZ tables are missing, use WDL tables as a fallback { // Filter out moves that do not preserve a draw or win - TB::RootInTB = Tablebases::root_probe_wdl(RootPos, RootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe_wdl(mth->pos, mth->rootMoves, TB::Score); // Only probe during search if winning if (TB::Score <= VALUE_DRAW) @@ -278,7 +277,7 @@ void Search::think() { if (TB::RootInTB) { - TB::Hits = RootMoves.size(); + TB::Hits = mth->rootMoves.size(); if (!TB::UseRule50) TB::Score = TB::Score > VALUE_DRAW ? VALUE_MATE - MAX_PLY - 1 @@ -287,16 +286,23 @@ void Search::think() { } } + // Prepare the threads. for (Thread* th : Threads) { th->maxPly = 0; + th->rootDepth = DEPTH_ZERO; + if (th != mth) { + Position pos(mth->pos, th); + th->pos = pos; + th->rootMoves = mth->rootMoves; + } th->notify_one(); // Wake up all the threads } Threads.timer->run = true; Threads.timer->notify_one(); // Start the recurring timer - id_loop(RootPos); // Let's start searching ! + id_loop(); // Let's start searching ! Threads.timer->run = false; } @@ -304,7 +310,7 @@ void Search::think() { // When playing in 'nodes as time' mode, subtract the searched nodes from // the available ones before to exit. if (Limits.npmsec) - Time.availableNodes += Limits.inc[us] - RootPos.nodes_searched(); + Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); // When we reach the maximum depth, we can arrive here without a raise of // Signals.stop. However, if we are pondering or in an infinite search, @@ -314,194 +320,265 @@ void Search::think() { if (!Signals.stop && (Limits.ponder || Limits.infinite)) { Signals.stopOnPonderhit = true; - RootPos.this_thread()->wait_for(Signals.stop); + mth->pos.this_thread()->wait_for(Signals.stop); } - sync_cout << "bestmove " << UCI::move(RootMoves[0].pv[0], RootPos.is_chess960()); + sync_cout << "bestmove " << UCI::move(mth->rootMoves[0].pv[0], mth->pos.is_chess960()); - if (RootMoves[0].pv.size() > 1 || RootMoves[0].extract_ponder_from_tt(RootPos)) - std::cout << " ponder " << UCI::move(RootMoves[0].pv[1], RootPos.is_chess960()); + if (mth->rootMoves[0].pv.size() > 1 || mth->rootMoves[0].extract_ponder_from_tt(mth->pos)) + std::cout << " ponder " << UCI::move(mth->rootMoves[0].pv[1], mth->pos.is_chess960()); std::cout << sync_endl; + } -namespace { +// Thread::id_loop() is the main iterative deepening loop. It calls search() repeatedly +// with increasing depth until the allocated thinking time has been consumed, +// user stops the search, or the maximum search depth is reached. - // id_loop() is the main iterative deepening loop. It calls search() repeatedly - // with increasing depth until the allocated thinking time has been consumed, - // user stops the search, or the maximum search depth is reached. +void Thread::id_loop() { - void id_loop(Position& pos) { + Threads.main()->mutex.lock(); + Threads.main()->slavesMask.set(idx); + Threads.main()->mutex.unlock(); - Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2) - Depth depth; - Value bestValue, alpha, beta, delta; + Value bestValue, alpha, beta, delta; - Move easyMove = EasyMove.get(pos.key()); - EasyMove.clear(); + Move easyMove = EasyMove.get(pos.key()); + EasyMove.clear(); - std::memset(ss-2, 0, 5 * sizeof(Stack)); + Stack *ss = stack+2; // To allow referencing (ss-2) and (ss+2) + std::memset(stack, 0, 5 * sizeof(Stack)); - depth = DEPTH_ZERO; - BestMoveChanges = 0; - bestValue = delta = alpha = -VALUE_INFINITE; - beta = VALUE_INFINITE; + bestValue = delta = alpha = -VALUE_INFINITE; + beta = VALUE_INFINITE; - TT.new_search(); + size_t multiPV = Options["MultiPV"]; + Skill skill(Options["Skill Level"]); - size_t multiPV = Options["MultiPV"]; - Skill skill(Options["Skill Level"]); + // When playing with strength handicap enable MultiPV search that we will + // use behind the scenes to retrieve a set of possible moves. + if (skill.enabled()) + multiPV = std::max(multiPV, (size_t)4); - // When playing with strength handicap enable MultiPV search that we will - // use behind the scenes to retrieve a set of possible moves. - if (skill.enabled()) - multiPV = std::max(multiPV, (size_t)4); + multiPV = std::min(multiPV, rootMoves.size()); - multiPV = std::min(multiPV, RootMoves.size()); + // Iterative deepening loop until requested to stop or target depth reached + while (true) + { + // Set up our new depth. + + // The main thread modifies other threads rootDepth, + // if it is <= main thread depth. The new depth will take effect + // after the other thread returns to id_loop(). + if (this == Threads.main()) { + ++rootDepth; + for (Thread* th : Threads) { + if (th != this) { + if (th->rootDepth <= rootDepth) { + th->rootDepth = rootDepth + (Depth)(((th->idx - 1) / 2) + 1); + } + } + } + } + else { + // This can cause a thread to search with the same depth for many iterations. + rootDepth = Threads.main()->rootDepth + (Depth)(((this->idx - 1) / 2) + 1); + } - // Iterative deepening loop until requested to stop or target depth reached - while (++depth < DEPTH_MAX && !Signals.stop && (!Limits.depth || depth <= Limits.depth)) - { - // Age out PV variability metric - BestMoveChanges *= 0.5; + if (rootDepth >= DEPTH_MAX || Signals.stop || (Limits.depth && rootDepth > Limits.depth)) + break; - // Save the last iteration's scores before first PV line is searched and - // all the move scores except the (new) PV are set to -VALUE_INFINITE. - for (RootMove& rm : RootMoves) - rm.previousScore = rm.score; + // Age out PV variability metric + if (this == Threads.main()) + BestMoveChanges *= 0.5; - // MultiPV loop. We perform a full root search for each PV line - for (PVIdx = 0; PVIdx < multiPV && !Signals.stop; ++PVIdx) - { - // Reset aspiration window starting size - if (depth >= 5 * ONE_PLY) - { - delta = Value(16); - alpha = std::max(RootMoves[PVIdx].previousScore - delta,-VALUE_INFINITE); - beta = std::min(RootMoves[PVIdx].previousScore + delta, VALUE_INFINITE); - } + // Save the last iteration's scores before first PV line is searched and + // all the move scores except the (new) PV are set to -VALUE_INFINITE. + for (RootMove& rm : rootMoves) + rm.previousScore = rm.score; - // Start with a small aspiration window and, in the case of a fail - // high/low, re-search with a bigger window until we're not failing - // high/low anymore. - while (true) - { - bestValue = search(pos, ss, alpha, beta, depth, false); - - // Bring the best move to the front. It is critical that sorting - // is done with a stable algorithm because all the values but the - // first and eventually the new best one are set to -VALUE_INFINITE - // and we want to keep the same order for all the moves except the - // new PV that goes to the front. Note that in case of MultiPV - // search the already searched PV lines are preserved. - std::stable_sort(RootMoves.begin() + PVIdx, RootMoves.end()); - - // Write PV back to transposition table in case the relevant - // entries have been overwritten during the search. - for (size_t i = 0; i <= PVIdx; ++i) - RootMoves[i].insert_pv_in_tt(pos); - - // If search has been stopped break immediately. Sorting and - // writing PV back to TT is safe because RootMoves is still - // valid, although it refers to previous iteration. - if (Signals.stop) - break; + // MultiPV loop. We perform a full root search for each PV line + for (PVIdx = 0; PVIdx < multiPV && !Signals.stop; ++PVIdx) + { + // Reset aspiration window starting size + if (rootDepth >= 5 * ONE_PLY) + { + delta = Value(16); + alpha = std::max(rootMoves[PVIdx].previousScore - delta,-VALUE_INFINITE); + beta = std::min(rootMoves[PVIdx].previousScore + delta, VALUE_INFINITE); + } +/* + sync_cout << "*T" << idx << " - I'm starting iteration " << rootDepth << "."; + std::cout << " We are searching "; + for (Thread* th : Threads) + std::cout << th->rootDepth << " "; + std::cout << "." << sync_endl; +*/ + // Start with a small aspiration window and, in the case of a fail + // high/low, re-search with a bigger window until we're not failing + // high/low anymore. + while (true) + { + bestValue = search(pos, ss, alpha, beta, rootDepth, false); + + // Bring the best move to the front. It is critical that sorting + // is done with a stable algorithm because all the values but the + // first and eventually the new best one are set to -VALUE_INFINITE + // and we want to keep the same order for all the moves except the + // new PV that goes to the front. Note that in case of MultiPV + // search the already searched PV lines are preserved. + + std::stable_sort(rootMoves.begin() + PVIdx, rootMoves.end()); + + // Write PV back to transposition table in case the relevant + // entries have been overwritten during the search. + for (size_t i = 0; i <= PVIdx; ++i) + rootMoves[i].insert_pv_in_tt(pos); + + // If search has been stopped break immediately. Sorting and + // writing PV back to TT is safe because RootMoves is still + // valid, although it refers to previous iteration. + if (Signals.stop) + break; + if (this == Threads.main()) { // When failing high/low give some update (without cluttering // the UI) before a re-search. - if ( multiPV == 1 + if (multiPV == 1 && (bestValue <= alpha || bestValue >= beta) && Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; - - // In case of failing low/high increase aspiration window and - // re-search, otherwise exit the loop. - if (bestValue <= alpha) - { - beta = (alpha + beta) / 2; - alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - Signals.failedLowAtRoot = true; - Signals.stopOnPonderhit = false; - } - else if (bestValue >= beta) - { - alpha = (alpha + beta) / 2; - beta = std::min(bestValue + delta, VALUE_INFINITE); - } - else - break; - - delta += delta / 2; - - assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); - } + sync_cout << UCI::pv(pos, rootDepth, alpha, beta) << sync_endl; + } - // Sort the PV lines searched so far and update the GUI - std::stable_sort(RootMoves.begin(), RootMoves.begin() + PVIdx + 1); + // In case of failing low/high increase aspiration window and + // re-search, otherwise exit the loop. + if (bestValue <= alpha) + { + beta = (alpha + beta) / 2; + alpha = std::max(bestValue - delta, -VALUE_INFINITE); + if (this == Threads.main()) { + Signals.failedLowAtRoot = true; + Signals.stopOnPonderhit = false; + } + } + else if (bestValue >= beta) + { + alpha = (alpha + beta) / 2; + beta = std::min(bestValue + delta, VALUE_INFINITE); + } + else + break; - if (Signals.stop) - sync_cout << "info nodes " << RootPos.nodes_searched() - << " time " << Time.elapsed() << sync_endl; + delta += delta / 2; - else if (PVIdx + 1 == multiPV || Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; - } + assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); + } - // If skill level is enabled and time is up, pick a sub-optimal best move - if (skill.enabled() && skill.time_to_pick(depth)) - skill.pick_best(multiPV); + // Sort the PV lines searched so far and update the GUI + std::stable_sort(rootMoves.begin(), rootMoves.begin() + PVIdx + 1); - // Have we found a "mate in x"? - if ( Limits.mate - && bestValue >= VALUE_MATE_IN_MAX_PLY - && VALUE_MATE - bestValue <= 2 * Limits.mate) - Signals.stop = true; + if (this != Threads.main()) + break; - // Do we have time for the next iteration? Can we stop searching now? - if (Limits.use_time_management()) - { - if (!Signals.stop && !Signals.stopOnPonderhit) - { - // Take some extra time if the best move has changed - if (depth > 4 * ONE_PLY && multiPV == 1) - Time.pv_instability(BestMoveChanges); - - // Stop the search if only one legal move is available or all - // of the available time has been used or we matched an easyMove - // from the previous search and just did a fast verification. - if ( RootMoves.size() == 1 - || Time.elapsed() > Time.available() - || ( RootMoves[0].pv[0] == easyMove - && BestMoveChanges < 0.03 - && Time.elapsed() > Time.available() / 10)) - { - // If we are allowed to ponder do not stop the search now but - // keep pondering until the GUI sends "ponderhit" or "stop". - if (Limits.ponder) - Signals.stopOnPonderhit = true; - else - Signals.stop = true; - } - } + if (Signals.stop) + sync_cout << "info nodes " << Threads.nodes_searched() + << " time " << Time.elapsed() << sync_endl; - if (RootMoves[0].pv.size() >= 3) - EasyMove.update(pos, RootMoves[0].pv); - else - EasyMove.clear(); - } - } + else if (PVIdx + 1 == multiPV || Time.elapsed() > 3000) + sync_cout << UCI::pv(pos, rootDepth, alpha, beta) << sync_endl; + } + + if (this != Threads.main()) + continue; + + // If skill level is enabled and time is up, pick a sub-optimal best move + if (skill.enabled() && skill.time_to_pick(rootDepth)) + skill.pick_best(multiPV); + + // Have we found a "mate in x"? + if ( Limits.mate + && bestValue >= VALUE_MATE_IN_MAX_PLY + && VALUE_MATE - bestValue <= 2 * Limits.mate) + Signals.stop = true; + + // Do we have time for the next iteration? Can we stop searching now? + if (Limits.use_time_management()) + { + if (!Signals.stop && !Signals.stopOnPonderhit) + { + // Take some extra time if the best move has changed + if (rootDepth > 4 * ONE_PLY && multiPV == 1) + Time.pv_instability(BestMoveChanges); + + // Stop the search if only one legal move is available or all + // of the available time has been used or we matched an easyMove + // from the previous search and just did a fast verification. + if ( rootMoves.size() == 1 + || Time.elapsed() > Time.available() + || ( rootMoves[0].pv[0] == easyMove + && BestMoveChanges < 0.03 + && Time.elapsed() > Time.available() / 10)) + { + // If we are allowed to ponder do not stop the search now but + // keep pondering until the GUI sends "ponderhit" or "stop". + if (Limits.ponder) + Signals.stopOnPonderhit = true; + else + Signals.stop = true; + } + } + + if (rootMoves[0].pv.size() >= 3) + EasyMove.update(pos, rootMoves[0].pv); + else + EasyMove.clear(); + } + } + + searching = false; + + Threads.main()->mutex.lock(); + Threads.main()->slavesMask.reset(idx); + Threads.main()->mutex.unlock(); +} + + + +namespace { + + // id_loop() calls Thread::id_loop() + + void id_loop() { + + BestMoveChanges = 0; + + TT.new_search(); + + // Start search for the other threads. + for (Thread* th : Threads) + th->searching = true; + + Threads.main()->id_loop(); + + // Force a quicker exit of fixed depth searches. + Signals.stop = true; + + // Wait until all threads have finished. + while (Threads.main()->slavesMask != 0) {} // Clear any candidate easy move that wasn't stable for the last search // iterations; the second condition prevents consecutive fast moves. if (EasyMove.stableCnt < 6 || Time.elapsed() < Time.available()) EasyMove.clear(); + size_t multiPV = Options["MultiPV"]; + Skill skill(Options["Skill Level"]); // If skill level is enabled, swap best PV line with the sub-optimal one if (skill.enabled()) - std::swap(RootMoves[0], *std::find(RootMoves.begin(), - RootMoves.end(), skill.best_move(multiPV))); + std::swap(Threads.main()->rootMoves[0], *std::find(Threads.main()->rootMoves.begin(), + Threads.main()->rootMoves.end(), skill.best_move(multiPV))); } @@ -512,7 +589,7 @@ namespace { // repeat all this work again. We also don't need to store anything to the hash // table here: This is taken care of after we return from the split point. - template + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { const bool RootNode = NT == Root; @@ -525,7 +602,6 @@ namespace { Move pv[MAX_PLY+1], quietsSearched[64]; StateInfo st; TTEntry* tte; - SplitPoint* splitPoint; Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth, predictedDepth; @@ -537,22 +613,6 @@ namespace { // Step 1. Initialize node Thread* thisThread = pos.this_thread(); inCheck = pos.checkers(); - - if (SpNode) - { - splitPoint = ss->splitPoint; - bestMove = splitPoint->bestMove; - bestValue = splitPoint->bestValue; - tte = nullptr; - ttHit = false; - ttMove = excludedMove = MOVE_NONE; - ttValue = VALUE_NONE; - - assert(splitPoint->bestValue > -VALUE_INFINITE && splitPoint->moveCount > 0); - - goto moves_loop; - } - moveCount = quietCount = ss->moveCount = 0; bestValue = -VALUE_INFINITE; ss->ply = (ss-1)->ply + 1; @@ -591,7 +651,7 @@ namespace { excludedMove = ss->excludedMove; posKey = excludedMove ? pos.exclusion_key() : pos.key(); tte = TT.probe(posKey, ttHit); - ss->ttMove = ttMove = RootNode ? RootMoves[PVIdx].pv[0] : ttHit ? tte->move() : MOVE_NONE; + ss->ttMove = ttMove = RootNode ? thisThread->rootMoves[thisThread->PVIdx].pv[0] : ttHit ? tte->move() : MOVE_NONE; ttValue = ttHit ? value_from_tt(tte->value(), ss->ply) : VALUE_NONE; // At non-PV nodes we check for a fail high/low. We don't prune at PV nodes @@ -710,7 +770,7 @@ namespace { pos.do_null_move(st); (ss+1)->skipEarlyPruning = true; nullValue = depth-R < ONE_PLY ? -qsearch(pos, ss+1, -beta, -beta+1, DEPTH_ZERO) - : - search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); + : - search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); (ss+1)->skipEarlyPruning = false; pos.undo_null_move(); @@ -726,7 +786,7 @@ namespace { // Do verification search at high depths ss->skipEarlyPruning = true; Value v = depth-R < ONE_PLY ? qsearch(pos, ss, beta-1, beta, DEPTH_ZERO) - : search(pos, ss, beta-1, beta, depth-R, false); + : search(pos, ss, beta-1, beta, depth-R, false); ss->skipEarlyPruning = false; if (v >= beta) @@ -749,7 +809,7 @@ namespace { assert((ss-1)->currentMove != MOVE_NONE); assert((ss-1)->currentMove != MOVE_NULL); - MovePicker mp(pos, ttMove, History, CounterMovesHistory, PieceValue[MG][pos.captured_piece_type()]); + MovePicker mp(pos, ttMove, thisThread->History, CounterMovesHistory, PieceValue[MG][pos.captured_piece_type()]); CheckInfo ci(pos); while ((move = mp.next_move()) != MOVE_NONE) @@ -757,7 +817,7 @@ namespace { { ss->currentMove = move; pos.do_move(move, st, pos.gives_check(move, ci)); - value = -search(pos, ss+1, -rbeta, -rbeta+1, rdepth, !cutNode); + value = -search(pos, ss+1, -rbeta, -rbeta+1, rdepth, !cutNode); pos.undo_move(move); if (value >= rbeta) return value; @@ -771,19 +831,19 @@ namespace { { Depth d = depth - 2 * ONE_PLY - (PvNode ? DEPTH_ZERO : depth / 4); ss->skipEarlyPruning = true; - search(pos, ss, alpha, beta, d, true); + search(pos, ss, alpha, beta, d, true); ss->skipEarlyPruning = false; tte = TT.probe(posKey, ttHit); ttMove = ttHit ? tte->move() : MOVE_NONE; } -moves_loop: // When in check and at SpNode search starts from here +moves_loop: // When in check search starts from here Square prevMoveSq = to_sq((ss-1)->currentMove); - Move countermove = Countermoves[pos.piece_on(prevMoveSq)][prevMoveSq]; + Move countermove = thisThread->Countermoves[pos.piece_on(prevMoveSq)][prevMoveSq]; - MovePicker mp(pos, ttMove, depth, History, CounterMovesHistory, countermove, ss); + MovePicker mp(pos, ttMove, depth, thisThread->History, CounterMovesHistory, countermove, ss); CheckInfo ci(pos); value = bestValue; // Workaround a bogus 'uninitialized' warning under gcc improving = ss->staticEval >= (ss-2)->staticEval @@ -791,7 +851,6 @@ namespace { ||(ss-2)->staticEval == VALUE_NONE; singularExtensionNode = !RootNode - && !SpNode && depth >= 8 * ONE_PLY && ttMove != MOVE_NONE /* && ttValue != VALUE_NONE Already implicit in the next condition */ @@ -802,7 +861,7 @@ namespace { // Step 11. Loop through moves // Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs - while ((move = mp.next_move()) != MOVE_NONE) + while ((move = mp.next_move()) != MOVE_NONE) { assert(is_ok(move)); @@ -812,29 +871,19 @@ namespace { // At root obey the "searchmoves" option and skip moves not listed in Root // Move List. As a consequence any illegal move is also skipped. In MultiPV // mode we also skip PV moves which have been already searched. - if (RootNode && !std::count(RootMoves.begin() + PVIdx, RootMoves.end(), move)) + if (RootNode && !std::count(thisThread->rootMoves.begin() + thisThread->PVIdx, thisThread->rootMoves.end(), move)) continue; - if (SpNode) - { - // Shared counter cannot be decremented later if the move turns out to be illegal - if (!pos.legal(move, ci.pinned)) - continue; + ss->moveCount = ++moveCount; - ss->moveCount = moveCount = ++splitPoint->moveCount; - splitPoint->spinlock.release(); - } - else - ss->moveCount = ++moveCount; - - if (RootNode) + if (RootNode && thisThread == Threads.main()) { Signals.firstRootMove = (moveCount == 1); - if (thisThread == Threads.main() && Time.elapsed() > 3000) + if (Time.elapsed() > 3000) sync_cout << "info depth " << depth / ONE_PLY << " currmove " << UCI::move(move, pos.is_chess960()) - << " currmovenumber " << moveCount + PVIdx << sync_endl; + << " currmovenumber " << moveCount + thisThread->PVIdx << sync_endl; } if (PvNode) @@ -864,7 +913,7 @@ namespace { Value rBeta = ttValue - 2 * depth / ONE_PLY; ss->excludedMove = move; ss->skipEarlyPruning = true; - value = search(pos, ss, rBeta - 1, rBeta, depth / 2, cutNode); + value = search(pos, ss, rBeta - 1, rBeta, depth / 2, cutNode); ss->skipEarlyPruning = false; ss->excludedMove = MOVE_NONE; @@ -887,9 +936,6 @@ namespace { if ( depth < 16 * ONE_PLY && moveCount >= FutilityMoveCounts[improving][depth]) { - if (SpNode) - splitPoint->spinlock.acquire(); - continue; } @@ -903,13 +949,6 @@ namespace { if (futilityValue <= alpha) { bestValue = std::max(bestValue, futilityValue); - - if (SpNode) - { - splitPoint->spinlock.acquire(); - if (bestValue > splitPoint->bestValue) - splitPoint->bestValue = bestValue; - } continue; } } @@ -917,9 +956,6 @@ namespace { // Prune moves with negative SEE at low depths if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO) { - if (SpNode) - splitPoint->spinlock.acquire(); - continue; } } @@ -928,7 +964,7 @@ namespace { prefetch(TT.first_entry(pos.key_after(move))); // Check for legality just before making the move - if (!RootNode && !SpNode && !pos.legal(move, ci.pinned)) + if (!RootNode && !pos.legal(move, ci.pinned)) { ss->moveCount = --moveCount; continue; @@ -950,12 +986,12 @@ namespace { ss->reduction = reduction(improving, depth, moveCount); if ( (!PvNode && cutNode) - || ( History[pos.piece_on(to_sq(move))][to_sq(move)] < VALUE_ZERO + || ( thisThread->History[pos.piece_on(to_sq(move))][to_sq(move)] < VALUE_ZERO && CounterMovesHistory[pos.piece_on(prevMoveSq)][prevMoveSq] [pos.piece_on(to_sq(move))][to_sq(move)] <= VALUE_ZERO)) ss->reduction += ONE_PLY; - if ( History[pos.piece_on(to_sq(move))][to_sq(move)] > VALUE_ZERO + if ( thisThread->History[pos.piece_on(to_sq(move))][to_sq(move)] > VALUE_ZERO && CounterMovesHistory[pos.piece_on(prevMoveSq)][prevMoveSq] [pos.piece_on(to_sq(move))][to_sq(move)] > VALUE_ZERO) ss->reduction = std::max(DEPTH_ZERO, ss->reduction - ONE_PLY); @@ -968,10 +1004,8 @@ namespace { ss->reduction = std::max(DEPTH_ZERO, ss->reduction - ONE_PLY); Depth d = std::max(newDepth - ss->reduction, ONE_PLY); - if (SpNode) - alpha = splitPoint->alpha; - value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); + value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); doFullDepthSearch = (value > alpha && ss->reduction != DEPTH_ZERO); ss->reduction = DEPTH_ZERO; @@ -982,13 +1016,10 @@ namespace { // Step 16. Full depth search, when LMR is skipped or fails high if (doFullDepthSearch) { - if (SpNode) - alpha = splitPoint->alpha; - value = newDepth < ONE_PLY ? givesCheck ? -qsearch(pos, ss+1, -(alpha+1), -alpha, DEPTH_ZERO) : -qsearch(pos, ss+1, -(alpha+1), -alpha, DEPTH_ZERO) - : - search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + : - search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); } // For PV nodes only, do a full PV search on the first move or after a fail @@ -1002,7 +1033,7 @@ namespace { value = newDepth < ONE_PLY ? givesCheck ? -qsearch(pos, ss+1, -beta, -alpha, DEPTH_ZERO) : -qsearch(pos, ss+1, -beta, -alpha, DEPTH_ZERO) - : - search(pos, ss+1, -beta, -alpha, newDepth, false); + : - search(pos, ss+1, -beta, -alpha, newDepth, false); } // Step 17. Undo move @@ -1011,22 +1042,15 @@ namespace { assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); // Step 18. Check for new best move - if (SpNode) - { - splitPoint->spinlock.acquire(); - bestValue = splitPoint->bestValue; - alpha = splitPoint->alpha; - } - // Finished searching the move. If a stop or a cutoff occurred, the return // value of the search cannot be trusted, and we return immediately without // updating best move, PV and TT. - if (Signals.stop || thisThread->cutoff_occurred()) + if (Signals.stop) return VALUE_ZERO; if (RootNode) { - RootMove& rm = *std::find(RootMoves.begin(), RootMoves.end(), move); + RootMove& rm = *std::find(thisThread->rootMoves.begin(), thisThread->rootMoves.end(), move); // PV move or new best move ? if (moveCount == 1 || value > alpha) @@ -1042,7 +1066,7 @@ namespace { // We record how often the best move has been changed in each // iteration. This information is used for time management: When // the best move changes frequently, we allocate some more time. - if (moveCount > 1) + if (moveCount > 1 && thisThread == Threads.main()) ++BestMoveChanges; } else @@ -1054,7 +1078,7 @@ namespace { if (value > bestValue) { - bestValue = SpNode ? splitPoint->bestValue = value : value; + bestValue = value; if (value > alpha) { @@ -1064,54 +1088,25 @@ namespace { && (move != EasyMove.get(pos.key()) || moveCount > 1)) EasyMove.clear(); - bestMove = SpNode ? splitPoint->bestMove = move : move; + bestMove = move; if (PvNode && !RootNode) // Update pv even in fail-high case - update_pv(SpNode ? splitPoint->ss->pv : ss->pv, move, (ss+1)->pv); + update_pv(ss->pv, move, (ss+1)->pv); if (PvNode && value < beta) // Update alpha! Always alpha < beta - alpha = SpNode ? splitPoint->alpha = value : value; + alpha = value; else { assert(value >= beta); // Fail high - - if (SpNode) - splitPoint->cutoff = true; - break; } } } - if (!SpNode && !captureOrPromotion && move != bestMove && quietCount < 64) + if (!captureOrPromotion && move != bestMove && quietCount < 64) quietsSearched[quietCount++] = move; - - // Step 19. Check for splitting the search - if ( !SpNode - && Threads.size() >= 2 - && depth >= Threads.minimumSplitDepth - && ( !thisThread->activeSplitPoint - || !thisThread->activeSplitPoint->allSlavesSearching - || ( Threads.size() > MAX_SLAVES_PER_SPLITPOINT - && thisThread->activeSplitPoint->slavesMask.count() == MAX_SLAVES_PER_SPLITPOINT)) - && thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD) - { - assert(bestValue > -VALUE_INFINITE && bestValue < beta); - - thisThread->split(pos, ss, alpha, beta, &bestValue, &bestMove, - depth, moveCount, &mp, NT, cutNode); - - if (Signals.stop || thisThread->cutoff_occurred()) - return VALUE_ZERO; - - if (bestValue >= beta) - break; - } } - if (SpNode) - return bestValue; - // Following condition would detect a stop or a cutoff set only after move // loop has been completed. But in this case bestValue is valid because we // have fully searched our subtree, and we can anyhow save the result in TT. @@ -1262,7 +1257,7 @@ namespace { // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions and checks (only if depth >= DEPTH_QS_CHECKS) will // be generated. - MovePicker mp(pos, ttMove, depth, History, CounterMovesHistory, to_sq((ss-1)->currentMove)); + MovePicker mp(pos, ttMove, depth, pos.this_thread()->History, CounterMovesHistory, to_sq((ss-1)->currentMove)); CheckInfo ci(pos); // Loop through the moves until no moves remain or a beta cutoff occurs @@ -1413,23 +1408,25 @@ namespace { ss->killers[0] = move; } + Thread *th = pos.this_thread(); // Shorthand + Value bonus = Value((depth / ONE_PLY) * (depth / ONE_PLY)); Square prevSq = to_sq((ss-1)->currentMove); HistoryStats& cmh = CounterMovesHistory[pos.piece_on(prevSq)][prevSq]; - History.updateH(pos.moved_piece(move), to_sq(move), bonus); + th->History.updateH(pos.moved_piece(move), to_sq(move), bonus); if (is_ok((ss-1)->currentMove)) { - Countermoves.update(pos.piece_on(prevSq), prevSq, move); + th->Countermoves.update(pos.piece_on(prevSq), prevSq, move); cmh.updateCMH(pos.moved_piece(move), to_sq(move), bonus); } // Decrease all the other played quiet moves for (int i = 0; i < quietsCnt; ++i) { - History.updateH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); + th->History.updateH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); if (is_ok((ss-1)->currentMove)) cmh.updateCMH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); @@ -1451,10 +1448,11 @@ namespace { Move Skill::pick_best(size_t multiPV) { // PRNG sequence should be non-deterministic, so we seed it with the time at init + MainThread* mth = Threads.main(); // Shorthand static PRNG rng(now()); // RootMoves are already sorted by score in descending order - int variance = std::min(RootMoves[0].score - RootMoves[multiPV - 1].score, PawnValueMg); + int variance = std::min(mth->rootMoves[0].score - mth->rootMoves[multiPV - 1].score, PawnValueMg); int weakness = 120 - 2 * level; int maxScore = -VALUE_INFINITE; @@ -1464,13 +1462,13 @@ namespace { for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = ( weakness * int(RootMoves[0].score - RootMoves[i].score) + int push = ( weakness * int(mth->rootMoves[0].score - mth->rootMoves[i].score) + variance * (rng.rand() % weakness)) / 128; - if (RootMoves[i].score + push > maxScore) + if (mth->rootMoves[i].score + push > maxScore) { - maxScore = RootMoves[i].score + push; - best = RootMoves[i].pv[0]; + maxScore = mth->rootMoves[i].score + push; + best = mth->rootMoves[i].pv[0]; } } return best; @@ -1484,24 +1482,22 @@ namespace { string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { + Thread* thisThread = pos.this_thread(); std::stringstream ss; int elapsed = Time.elapsed() + 1; - size_t multiPV = std::min((size_t)Options["MultiPV"], RootMoves.size()); - int selDepth = 0; - - for (Thread* th : Threads) - if (th->maxPly > selDepth) - selDepth = th->maxPly; + size_t multiPV = std::min((size_t)Options["MultiPV"], thisThread->rootMoves.size()); + int selDepth = thisThread->maxPly; + uint64_t nodes = Threads.nodes_searched(); for (size_t i = 0; i < multiPV; ++i) { - bool updated = (i <= PVIdx); + bool updated = (i <= thisThread->PVIdx); if (depth == ONE_PLY && !updated) continue; Depth d = updated ? depth : depth - ONE_PLY; - Value v = updated ? RootMoves[i].score : RootMoves[i].previousScore; + Value v = updated ? thisThread->rootMoves[i].score : thisThread->rootMoves[i].previousScore; bool tb = TB::RootInTB && abs(v) < VALUE_MATE - MAX_PLY; v = tb ? TB::Score : v; @@ -1515,11 +1511,11 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " multipv " << i + 1 << " score " << UCI::value(v); - if (!tb && i == PVIdx) + if (!tb && i == thisThread->PVIdx) ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); - ss << " nodes " << pos.nodes_searched() - << " nps " << pos.nodes_searched() * 1000 / elapsed; + ss << " nodes " << nodes + << " nps " << nodes * 1000 / elapsed; if (elapsed > 1000) // Earlier makes little sense ss << " hashfull " << TT.hashfull(); @@ -1528,7 +1524,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " time " << elapsed << " pv"; - for (Move m : RootMoves[i].pv) + for (Move m : thisThread->rootMoves[i].pv) ss << " " << UCI::move(m, pos.is_chess960()); } @@ -1593,140 +1589,24 @@ bool RootMove::extract_ponder_from_tt(Position& pos) void Thread::idle_loop() { - // Pointer 'this_sp' is not null only if we are called from split(), and not - // at the thread creation. This means we are the split point's master. - SplitPoint* this_sp = activeSplitPoint; - - assert(!this_sp || (this_sp->master == this && searching)); - - while (!exit && !(this_sp && this_sp->slavesMask.none())) + while (!exit) { // If this thread has been assigned work, launch a search - while (searching) - { - spinlock.acquire(); - - assert(activeSplitPoint); - SplitPoint* sp = activeSplitPoint; - - spinlock.release(); - - Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2) - Position pos(*sp->pos, this); - - std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack)); - ss->splitPoint = sp; - - sp->spinlock.acquire(); - - assert(activePosition == nullptr); - - activePosition = &pos; - - if (sp->nodeType == NonPV) - search(pos, ss, sp->alpha, sp->beta, sp->depth, sp->cutNode); + if (searching) + this->id_loop(); - else if (sp->nodeType == PV) - search(pos, ss, sp->alpha, sp->beta, sp->depth, sp->cutNode); - - else if (sp->nodeType == Root) - search(pos, ss, sp->alpha, sp->beta, sp->depth, sp->cutNode); - - else - assert(false); - - assert(searching); - - spinlock.acquire(); - - searching = false; - activePosition = nullptr; - - spinlock.release(); - - sp->slavesMask.reset(idx); - sp->allSlavesSearching = false; - sp->nodes += pos.nodes_searched(); - - // After releasing the lock we can't access any SplitPoint related data - // in a safe way because it could have been released under our feet by - // the sp master. - sp->spinlock.release(); - - // Try to late join to another split point if none of its slaves has - // already finished. - SplitPoint* bestSp = NULL; - int minLevel = INT_MAX; - - for (Thread* th : Threads) - { - const size_t size = th->splitPointsSize; // Local copy - sp = size ? &th->splitPoints[size - 1] : nullptr; - - if ( sp - && sp->allSlavesSearching - && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT - && can_join(sp)) - { - assert(this != th); - assert(!(this_sp && this_sp->slavesMask.none())); - assert(Threads.size() > 2); - - // Prefer to join to SP with few parents to reduce the probability - // that a cut-off occurs above us, and hence we waste our work. - int level = 0; - for (SplitPoint* p = th->activeSplitPoint; p; p = p->parentSplitPoint) - level++; - - if (level < minLevel) - { - bestSp = sp; - minLevel = level; - } - } - } - - if (bestSp) - { - sp = bestSp; - - // Recheck the conditions under lock protection - sp->spinlock.acquire(); - - if ( sp->allSlavesSearching - && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT) - { - spinlock.acquire(); - - if (can_join(sp)) - { - sp->slavesMask.set(idx); - activeSplitPoint = sp; - searching = true; - } - - spinlock.release(); - } - - sp->spinlock.release(); - } - } - - // If search is finished then sleep, otherwise just yield + // If search is finished then sleep if (!Threads.main()->thinking) { - assert(!this_sp); - std::unique_lock lk(mutex); while (!exit && !Threads.main()->thinking) sleepCondition.wait(lk); } - else - std::this_thread::yield(); // Wait for a new job or for our slaves to finish } } + /// check_time() is called by the timer thread when the timer triggers. It is /// used to print debug info and, more importantly, to detect when we are out of /// available time and thus stop the search. @@ -1761,28 +1641,7 @@ void check_time() { else if (Limits.nodes) { - int64_t nodes = RootPos.nodes_searched(); - - // Loop across all split points and sum accumulated SplitPoint nodes plus - // all the currently active positions nodes. - // FIXME: Racy... - for (Thread* th : Threads) - for (size_t i = 0; i < th->splitPointsSize; ++i) - { - SplitPoint& sp = th->splitPoints[i]; - - sp.spinlock.acquire(); - - nodes += sp.nodes; - - for (size_t idx = 0; idx < Threads.size(); ++idx) - if (sp.slavesMask.test(idx) && Threads[idx]->activePosition) - nodes += Threads[idx]->activePosition->nodes_searched(); - - sp.spinlock.release(); - } - - if (nodes >= Limits.nodes) + if ((int64_t)Threads.nodes_searched() >= Limits.nodes) Signals.stop = true; } } diff --git a/src/search.h b/src/search.h index 5ba95fe098a..eaf978eceec 100644 --- a/src/search.h +++ b/src/search.h @@ -101,8 +101,6 @@ typedef std::unique_ptr> StateStackPtr; extern volatile SignalsType Signals; extern LimitsType Limits; -extern RootMoveVector RootMoves; -extern Position RootPos; extern StateStackPtr SetupStates; void init(); diff --git a/src/thread.cpp b/src/thread.cpp index cdb0d5412a5..f23d7820d36 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -82,143 +82,10 @@ Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in searching = false; maxPly = 0; - splitPointsSize = 0; - activeSplitPoint = nullptr; - activePosition = nullptr; idx = Threads.size(); // Starts from 0 } -// Thread::cutoff_occurred() checks whether a beta cutoff has occurred in the -// current active split point, or in some ancestor of the split point. - -bool Thread::cutoff_occurred() const { - - for (SplitPoint* sp = activeSplitPoint; sp; sp = sp->parentSplitPoint) - if (sp->cutoff) - return true; - - return false; -} - - -// Thread::can_join() checks whether the thread is available to join the split -// point 'sp'. An obvious requirement is that thread must be idle. With more than -// two threads, this is not sufficient: If the thread is the master of some split -// point, it is only available as a slave for the split points below his active -// one (the "helpful master" concept in YBWC terminology). - -bool Thread::can_join(const SplitPoint* sp) const { - - if (searching) - return false; - - // Make a local copy to be sure it doesn't become zero under our feet while - // testing next condition and so leading to an out of bounds access. - const size_t size = splitPointsSize; - - // No split points means that the thread is available as a slave for any - // other thread otherwise apply the "helpful master" concept if possible. - return !size || splitPoints[size - 1].slavesMask.test(sp->master->idx); -} - - -// Thread::split() does the actual work of distributing the work at a node between -// several available threads. If it does not succeed in splitting the node -// (because no idle threads are available), the function immediately returns. -// If splitting is possible, a SplitPoint object is initialized with all the -// data that must be copied to the helper threads and then helper threads are -// informed that they have been assigned work. This will cause them to instantly -// leave their idle loops and call search(). When all threads have returned from -// search() then split() returns. - -void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bestValue, - Move* bestMove, Depth depth, int moveCount, - MovePicker* movePicker, int nodeType, bool cutNode) { - - assert(searching); - assert(-VALUE_INFINITE < *bestValue && *bestValue <= alpha && alpha < beta && beta <= VALUE_INFINITE); - assert(depth >= Threads.minimumSplitDepth); - assert(splitPointsSize < MAX_SPLITPOINTS_PER_THREAD); - - // Pick and init the next available split point - SplitPoint& sp = splitPoints[splitPointsSize]; - - sp.spinlock.acquire(); // No contention here until we don't increment splitPointsSize - - sp.master = this; - sp.parentSplitPoint = activeSplitPoint; - sp.slavesMask = 0, sp.slavesMask.set(idx); - sp.depth = depth; - sp.bestValue = *bestValue; - sp.bestMove = *bestMove; - sp.alpha = alpha; - sp.beta = beta; - sp.nodeType = nodeType; - sp.cutNode = cutNode; - sp.movePicker = movePicker; - sp.moveCount = moveCount; - sp.pos = &pos; - sp.nodes = 0; - sp.cutoff = false; - sp.ss = ss; - sp.allSlavesSearching = true; // Must be set under lock protection - - ++splitPointsSize; - activeSplitPoint = &sp; - activePosition = nullptr; - - // Try to allocate available threads - Thread* slave; - - while ( sp.slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT - && (slave = Threads.available_slave(&sp)) != nullptr) - { - slave->spinlock.acquire(); - - if (slave->can_join(activeSplitPoint)) - { - activeSplitPoint->slavesMask.set(slave->idx); - slave->activeSplitPoint = activeSplitPoint; - slave->searching = true; - } - - slave->spinlock.release(); - } - - // Everything is set up. The master thread enters the idle loop, from which - // it will instantly launch a search, because its 'searching' flag is set. - // The thread will return from the idle loop when all slaves have finished - // their work at this split point. - sp.spinlock.release(); - - Thread::idle_loop(); // Force a call to base class idle_loop() - - // In the helpful master concept, a master can help only a sub-tree of its - // split point and because everything is finished here, it's not possible - // for the master to be booked. - assert(!searching); - assert(!activePosition); - - // We have returned from the idle loop, which means that all threads are - // finished. Note that decreasing splitPointsSize must be done under lock - // protection to avoid a race with Thread::can_join(). - spinlock.acquire(); - - searching = true; - --splitPointsSize; - activeSplitPoint = sp.parentSplitPoint; - activePosition = &pos; - - spinlock.release(); - - // Split point data cannot be changed now, so no need to lock protect - pos.set_nodes_searched(pos.nodes_searched() + sp.nodes); - *bestMove = sp.bestMove; - *bestValue = sp.bestValue; -} - - // TimerThread::idle_loop() is where the timer thread waits Resolution milliseconds // and then calls check_time(). When not searching, thread sleeps until it's woken up. @@ -260,13 +127,7 @@ void MainThread::idle_loop() { if (!exit) { - searching = true; - Search::think(); - - assert(searching); - - searching = false; } } } @@ -317,7 +178,6 @@ void ThreadPool::exit() { void ThreadPool::read_uci_options() { - minimumSplitDepth = Options["Min Split Depth"] * ONE_PLY; size_t requested = Options["Threads"]; assert(requested > 0); @@ -333,16 +193,14 @@ void ThreadPool::read_uci_options() { } -// ThreadPool::available_slave() tries to find an idle thread which is available -// to join SplitPoint 'sp'. - -Thread* ThreadPool::available_slave(const SplitPoint* sp) const { +// ThreadPool::nodes_searched() returns the number of nodes searched. - for (Thread* th : *this) - if (th->can_join(sp)) - return th; +uint64_t ThreadPool::nodes_searched() { - return nullptr; + uint64_t nodes = 0; + for (Thread *th : *this) + nodes += th->pos.nodes_searched(); + return nodes; } @@ -351,13 +209,14 @@ Thread* ThreadPool::available_slave(const SplitPoint* sp) const { void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, StateStackPtr& states) { + main()->join(); Signals.stopOnPonderhit = Signals.firstRootMove = false; Signals.stop = Signals.failedLowAtRoot = false; - RootMoves.clear(); - RootPos = pos; + main()->rootMoves.clear(); + main()->pos = pos; Limits = limits; if (states.get()) // If we don't set a new position, preserve current state { @@ -368,7 +227,7 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, for (const auto& m : MoveList(pos)) if ( limits.searchmoves.empty() || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) - RootMoves.push_back(RootMove(m)); + main()->rootMoves.push_back(RootMove(m)); main()->thinking = true; main()->notify_one(); // Wake up main thread: 'thinking' must be already set diff --git a/src/thread.h b/src/thread.h index e880b01c6fc..f88dc11f771 100644 --- a/src/thread.h +++ b/src/thread.h @@ -37,53 +37,6 @@ struct Thread; const size_t MAX_THREADS = 128; -const size_t MAX_SPLITPOINTS_PER_THREAD = 8; -const size_t MAX_SLAVES_PER_SPLITPOINT = 4; - -class Spinlock { - - std::atomic_int lock; - -public: - Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013 - void acquire() { - while (lock.fetch_sub(1, std::memory_order_acquire) != 1) - while (lock.load(std::memory_order_relaxed) <= 0) - std::this_thread::yield(); // Be nice to hyperthreading - } - void release() { lock.store(1, std::memory_order_release); } -}; - - -/// SplitPoint struct stores information shared by the threads searching in -/// parallel below the same split point. It is populated at splitting time. - -struct SplitPoint { - - // Const data after split point has been setup - const Position* pos; - Search::Stack* ss; - Thread* master; - Depth depth; - Value beta; - int nodeType; - bool cutNode; - - // Const pointers to shared data - MovePicker* movePicker; - SplitPoint* parentSplitPoint; - - // Shared variable data - Spinlock spinlock; - std::bitset slavesMask; - volatile bool allSlavesSearching; - volatile uint64_t nodes; - volatile Value alpha; - volatile Value bestValue; - volatile Move bestMove; - volatile int moveCount; - volatile bool cutoff; -}; /// ThreadBase struct is the base of the hierarchy from where we derive all the @@ -97,7 +50,6 @@ struct ThreadBase : public std::thread { void wait_for(volatile const bool& b); Mutex mutex; - Spinlock spinlock; ConditionVariable sleepCondition; volatile bool exit = false; }; @@ -112,22 +64,22 @@ struct Thread : public ThreadBase { Thread(); virtual void idle_loop(); - bool cutoff_occurred() const; - bool can_join(const SplitPoint* sp) const; + virtual void id_loop(); - void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove, - Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode); - - SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD]; Pawns::Table pawnsTable; Material::Table materialTable; Endgames endgames; - Position* activePosition; - size_t idx; + size_t idx, PVIdx; int maxPly; - SplitPoint* volatile activeSplitPoint; - volatile size_t splitPointsSize; volatile bool searching; + + // Data per thread. + Position pos; + Search::RootMoveVector rootMoves; + Search::Stack stack[MAX_PLY+4]; + HistoryStats History; + MovesStats Countermoves; + Depth rootDepth; }; @@ -138,6 +90,7 @@ struct MainThread : public Thread { virtual void idle_loop(); void join(); volatile bool thinking = true; // Avoid a race with start_thinking() + std::bitset slavesMask; }; struct TimerThread : public ThreadBase { @@ -161,10 +114,8 @@ struct ThreadPool : public std::vector { MainThread* main() { return static_cast(at(0)); } void read_uci_options(); - Thread* available_slave(const SplitPoint* sp) const; void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&); - - Depth minimumSplitDepth; + uint64_t nodes_searched(); TimerThread* timer; }; diff --git a/src/timeman.h b/src/timeman.h index c5390befdff..24632d798cd 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -22,6 +22,7 @@ #include "misc.h" #include "search.h" +#include "thread.h" /// The TimeManagement class computes the optimal time to think depending on /// the maximum available time, the game move number and other parameters. @@ -32,7 +33,7 @@ class TimeManagement { void pv_instability(double bestMoveChanges) { unstablePvFactor = 1 + bestMoveChanges; } int available() const { return int(optimumTime * unstablePvFactor * 0.76); } int maximum() const { return maximumTime; } - int elapsed() const { return int(Search::Limits.npmsec ? Search::RootPos.nodes_searched() : now() - start); } + int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - start); } int64_t availableNodes; // When in 'nodes as time' mode From 713604a3d480cb894e0acb58086ba2ffa6b23e4a Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Wed, 7 Oct 2015 08:57:25 +0200 Subject: [PATCH 02/21] Fix easy move bench: 8397672 --- src/search.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 61796406c5e..64533edc774 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -345,8 +345,12 @@ void Thread::id_loop() { Value bestValue, alpha, beta, delta; - Move easyMove = EasyMove.get(pos.key()); - EasyMove.clear(); + Move easyMove = MOVE_NONE; + if (this == Threads.main()) + { + easyMove = EasyMove.get(pos.key()); + EasyMove.clear(); + } Stack *ss = stack+2; // To allow referencing (ss-2) and (ss+2) std::memset(stack, 0, 5 * sizeof(Stack)); From b01ad9ba18958a18c2f14f4ffb897e9ac4e52016 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Tue, 6 Oct 2015 21:45:12 +0200 Subject: [PATCH 03/21] Reformat lazy smp code Just a first quick pass. Probably Skill and MultiPV need some work too. No functional change. --- src/movepick.cpp | 4 +- src/movepick.h | 2 +- src/search.cpp | 147 ++++++++++++++++++++++------------------------- src/thread.cpp | 1 - src/thread.h | 3 +- 5 files changed, 74 insertions(+), 83 deletions(-) diff --git a/src/movepick.cpp b/src/movepick.cpp index 1f01aaafc95..ed7c380079d 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -238,8 +238,8 @@ void MovePicker::generate_next_stage() { /// a new pseudo legal move every time it is called, until there are no more moves /// left. It picks the move with the biggest value from a list of generated moves /// taking care not to return the ttMove if it has already been searched. -template<> -Move MovePicker::next_move() { + +Move MovePicker::next_move() { Move move; diff --git a/src/movepick.h b/src/movepick.h index b488313a446..d3bca28a7ae 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -92,7 +92,7 @@ class MovePicker { MovePicker(const Position&, Move, const HistoryStats&, const CounterMovesHistoryStats&, Value); MovePicker(const Position&, Move, Depth, const HistoryStats&, const CounterMovesHistoryStats&, Move, Search::Stack*); - template Move next_move(); + Move next_move(); private: template void score(); diff --git a/src/search.cpp b/src/search.cpp index 64533edc774..50a58baf685 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -106,7 +106,7 @@ namespace { assert(newPv.size() >= 3); - // Keep track of how many times in a row 3rd ply remains stable + // Keep track of how many times in a row 3rd ply remains stable stableCnt = (newPv[2] == pv[2]) ? stableCnt + 1 : 0; if (!std::equal(newPv.begin(), newPv.begin() + 3, pv)) @@ -182,7 +182,8 @@ void Search::reset () { TT.clear(); CounterMovesHistory.clear(); - for (Thread* th : Threads) { + for (Thread* th : Threads) + { th->History.clear(); th->Countermoves.clear(); } @@ -225,9 +226,10 @@ template uint64_t Search::perft(Position& pos, Depth depth); void Search::think() { - MainThread* mth = Threads.main(); // Shorthand - Color us = mth->pos.side_to_move(); - Time.init(Limits, us, mth->pos.game_ply(), now()); + Position& rootPos = Threads.main()->pos; + Search::RootMoveVector& rootMoves = Threads.main()->rootMoves; + Color us = rootPos.side_to_move(); + Time.init(Limits, us, rootPos.game_ply(), now()); int contempt = Options["Contempt"] * PawnValueEg / 100; // From centipawns DrawValue[ us] = VALUE_DRAW - Value(contempt); @@ -246,21 +248,21 @@ void Search::think() { TB::ProbeDepth = DEPTH_ZERO; } - if (mth->rootMoves.empty()) + if (rootMoves.empty()) { - mth->rootMoves.push_back(RootMove(MOVE_NONE)); + rootMoves.push_back(RootMove(MOVE_NONE)); sync_cout << "info depth 0 score " - << UCI::value(mth->pos.checkers() ? -VALUE_MATE : VALUE_DRAW) + << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) << sync_endl; } else { - if (TB::Cardinality >= mth->pos.count(WHITE) - + mth->pos.count(BLACK)) + if (TB::Cardinality >= rootPos.count(WHITE) + + rootPos.count(BLACK)) { // If the current root position is in the tablebases then RootMoves // contains only moves that preserve the draw or win. - TB::RootInTB = Tablebases::root_probe(mth->pos, mth->rootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe(rootPos, rootMoves, TB::Score); if (TB::RootInTB) TB::Cardinality = 0; // Do not probe tablebases during the search @@ -268,7 +270,7 @@ void Search::think() { else // If DTZ tables are missing, use WDL tables as a fallback { // Filter out moves that do not preserve a draw or win - TB::RootInTB = Tablebases::root_probe_wdl(mth->pos, mth->rootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe_wdl(rootPos, rootMoves, TB::Score); // Only probe during search if winning if (TB::Score <= VALUE_DRAW) @@ -277,7 +279,7 @@ void Search::think() { if (TB::RootInTB) { - TB::Hits = mth->rootMoves.size(); + TB::Hits = rootMoves.size(); if (!TB::UseRule50) TB::Score = TB::Score > VALUE_DRAW ? VALUE_MATE - MAX_PLY - 1 @@ -286,15 +288,14 @@ void Search::think() { } } - // Prepare the threads. for (Thread* th : Threads) { th->maxPly = 0; - th->rootDepth = DEPTH_ZERO; - if (th != mth) { - Position pos(mth->pos, th); - th->pos = pos; - th->rootMoves = mth->rootMoves; + th->depth = DEPTH_ZERO; + if (th != Threads.main()) + { + th->pos = Position(rootPos, th); + th->rootMoves = rootMoves; } th->notify_one(); // Wake up all the threads } @@ -320,22 +321,21 @@ void Search::think() { if (!Signals.stop && (Limits.ponder || Limits.infinite)) { Signals.stopOnPonderhit = true; - mth->pos.this_thread()->wait_for(Signals.stop); + rootPos.this_thread()->wait_for(Signals.stop); } - sync_cout << "bestmove " << UCI::move(mth->rootMoves[0].pv[0], mth->pos.is_chess960()); + sync_cout << "bestmove " << UCI::move(rootMoves[0].pv[0], rootPos.is_chess960()); - if (mth->rootMoves[0].pv.size() > 1 || mth->rootMoves[0].extract_ponder_from_tt(mth->pos)) - std::cout << " ponder " << UCI::move(mth->rootMoves[0].pv[1], mth->pos.is_chess960()); + if (rootMoves[0].pv.size() > 1 || rootMoves[0].extract_ponder_from_tt(rootPos)) + std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], rootPos.is_chess960()); std::cout << sync_endl; - } -// Thread::id_loop() is the main iterative deepening loop. It calls search() repeatedly -// with increasing depth until the allocated thinking time has been consumed, -// user stops the search, or the maximum search depth is reached. +// Thread::id_loop() is the main iterative deepening loop. It calls search() +// repeatedly with increasing depth until the allocated thinking time has been +// consumed, user stops the search, or the maximum search depth is reached. void Thread::id_loop() { @@ -352,8 +352,8 @@ void Thread::id_loop() { EasyMove.clear(); } - Stack *ss = stack+2; // To allow referencing (ss-2) and (ss+2) - std::memset(stack, 0, 5 * sizeof(Stack)); + Stack* ss = stack+2; // To allow referencing (ss-2) and (ss+2) + std::memset(ss-2, 0, 5 * sizeof(Stack)); bestValue = delta = alpha = -VALUE_INFINITE; beta = VALUE_INFINITE; @@ -371,27 +371,23 @@ void Thread::id_loop() { // Iterative deepening loop until requested to stop or target depth reached while (true) { - // Set up our new depth. - - // The main thread modifies other threads rootDepth, - // if it is <= main thread depth. The new depth will take effect - // after the other thread returns to id_loop(). - if (this == Threads.main()) { - ++rootDepth; - for (Thread* th : Threads) { - if (th != this) { - if (th->rootDepth <= rootDepth) { - th->rootDepth = rootDepth + (Depth)(((th->idx - 1) / 2) + 1); - } - } - } - } - else { - // This can cause a thread to search with the same depth for many iterations. - rootDepth = Threads.main()->rootDepth + (Depth)(((this->idx - 1) / 2) + 1); + // Set up our new depth + + // The main thread modifies other threads rootDepth, if it is <= main + // thread depth. The new depth will take effect after the other thread + // returns to id_loop(). + if (this == Threads.main()) + { + ++depth; + for (Thread* th : Threads) + if (th != this && th->depth <= depth) + th->depth = depth + ONE_PLY + Depth(th->idx - 1) / 2; } + else + // This can cause a thread to search with the same depth for many iterations + depth = Threads.main()->depth + ONE_PLY + Depth(this->idx - 1) / 2; - if (rootDepth >= DEPTH_MAX || Signals.stop || (Limits.depth && rootDepth > Limits.depth)) + if (depth >= DEPTH_MAX || Signals.stop || (Limits.depth && depth > Limits.depth)) break; // Age out PV variability metric @@ -407,25 +403,19 @@ void Thread::id_loop() { for (PVIdx = 0; PVIdx < multiPV && !Signals.stop; ++PVIdx) { // Reset aspiration window starting size - if (rootDepth >= 5 * ONE_PLY) + if (depth >= 5 * ONE_PLY) { delta = Value(16); alpha = std::max(rootMoves[PVIdx].previousScore - delta,-VALUE_INFINITE); beta = std::min(rootMoves[PVIdx].previousScore + delta, VALUE_INFINITE); } -/* - sync_cout << "*T" << idx << " - I'm starting iteration " << rootDepth << "."; - std::cout << " We are searching "; - for (Thread* th : Threads) - std::cout << th->rootDepth << " "; - std::cout << "." << sync_endl; -*/ + // Start with a small aspiration window and, in the case of a fail // high/low, re-search with a bigger window until we're not failing // high/low anymore. while (true) { - bestValue = search(pos, ss, alpha, beta, rootDepth, false); + bestValue = search(pos, ss, alpha, beta, depth, false); // Bring the best move to the front. It is critical that sorting // is done with a stable algorithm because all the values but the @@ -447,13 +437,14 @@ void Thread::id_loop() { if (Signals.stop) break; - if (this == Threads.main()) { + if (this == Threads.main()) + { // When failing high/low give some update (without cluttering // the UI) before a re-search. if (multiPV == 1 && (bestValue <= alpha || bestValue >= beta) && Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, rootDepth, alpha, beta) << sync_endl; + sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; } // In case of failing low/high increase aspiration window and @@ -462,7 +453,9 @@ void Thread::id_loop() { { beta = (alpha + beta) / 2; alpha = std::max(bestValue - delta, -VALUE_INFINITE); - if (this == Threads.main()) { + + if (this == Threads.main()) + { Signals.failedLowAtRoot = true; Signals.stopOnPonderhit = false; } @@ -491,14 +484,14 @@ void Thread::id_loop() { << " time " << Time.elapsed() << sync_endl; else if (PVIdx + 1 == multiPV || Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, rootDepth, alpha, beta) << sync_endl; + sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; } if (this != Threads.main()) continue; // If skill level is enabled and time is up, pick a sub-optimal best move - if (skill.enabled() && skill.time_to_pick(rootDepth)) + if (skill.enabled() && skill.time_to_pick(depth)) skill.pick_best(multiPV); // Have we found a "mate in x"? @@ -513,7 +506,7 @@ void Thread::id_loop() { if (!Signals.stop && !Signals.stopOnPonderhit) { // Take some extra time if the best move has changed - if (rootDepth > 4 * ONE_PLY && multiPV == 1) + if (depth > 4 * ONE_PLY && multiPV == 1) Time.pv_instability(BestMoveChanges); // Stop the search if only one legal move is available or all @@ -560,13 +553,13 @@ namespace { TT.new_search(); - // Start search for the other threads. + // Start search for the other threads for (Thread* th : Threads) th->searching = true; Threads.main()->id_loop(); - // Force a quicker exit of fixed depth searches. + // Force a quicker exit of fixed depth searches Signals.stop = true; // Wait until all threads have finished. @@ -579,6 +572,7 @@ namespace { size_t multiPV = Options["MultiPV"]; Skill skill(Options["Skill Level"]); + // If skill level is enabled, swap best PV line with the sub-optimal one if (skill.enabled()) std::swap(Threads.main()->rootMoves[0], *std::find(Threads.main()->rootMoves.begin(), @@ -816,7 +810,7 @@ namespace { MovePicker mp(pos, ttMove, thisThread->History, CounterMovesHistory, PieceValue[MG][pos.captured_piece_type()]); CheckInfo ci(pos); - while ((move = mp.next_move()) != MOVE_NONE) + while ((move = mp.next_move()) != MOVE_NONE) if (pos.legal(move, ci.pinned)) { ss->currentMove = move; @@ -865,7 +859,7 @@ namespace { // Step 11. Loop through moves // Loop through all pseudo-legal moves until no moves remain or a beta cutoff occurs - while ((move = mp.next_move()) != MOVE_NONE) + while ((move = mp.next_move()) != MOVE_NONE) { assert(is_ok(move)); @@ -1046,8 +1040,8 @@ namespace { assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); // Step 18. Check for new best move - // Finished searching the move. If a stop or a cutoff occurred, the return - // value of the search cannot be trusted, and we return immediately without + // Finished searching the move. If a stop occurred, the return value of + // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. if (Signals.stop) return VALUE_ZERO; @@ -1265,7 +1259,7 @@ namespace { CheckInfo ci(pos); // Loop through the moves until no moves remain or a beta cutoff occurs - while ((move = mp.next_move()) != MOVE_NONE) + while ((move = mp.next_move()) != MOVE_NONE) { assert(is_ok(move)); @@ -1452,11 +1446,11 @@ namespace { Move Skill::pick_best(size_t multiPV) { // PRNG sequence should be non-deterministic, so we seed it with the time at init - MainThread* mth = Threads.main(); // Shorthand + const Search::RootMoveVector& rootMoves = Threads.main()->rootMoves; static PRNG rng(now()); // RootMoves are already sorted by score in descending order - int variance = std::min(mth->rootMoves[0].score - mth->rootMoves[multiPV - 1].score, PawnValueMg); + int variance = std::min(rootMoves[0].score - rootMoves[multiPV - 1].score, PawnValueMg); int weakness = 120 - 2 * level; int maxScore = -VALUE_INFINITE; @@ -1466,13 +1460,13 @@ namespace { for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = ( weakness * int(mth->rootMoves[0].score - mth->rootMoves[i].score) + int push = ( weakness * int(rootMoves[0].score - rootMoves[i].score) + variance * (rng.rand() % weakness)) / 128; - if (mth->rootMoves[i].score + push > maxScore) + if (rootMoves[i].score + push > maxScore) { - maxScore = mth->rootMoves[i].score + push; - best = mth->rootMoves[i].pv[0]; + maxScore = rootMoves[i].score + push; + best = rootMoves[i].pv[0]; } } return best; @@ -1610,7 +1604,6 @@ void Thread::idle_loop() { } - /// check_time() is called by the timer thread when the timer triggers. It is /// used to print debug info and, more importantly, to detect when we are out of /// available time and thus stop the search. diff --git a/src/thread.cpp b/src/thread.cpp index f23d7820d36..14d3bab1ae1 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -209,7 +209,6 @@ uint64_t ThreadPool::nodes_searched() { void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, StateStackPtr& states) { - main()->join(); Signals.stopOnPonderhit = Signals.firstRootMove = false; diff --git a/src/thread.h b/src/thread.h index f88dc11f771..e9f232180d6 100644 --- a/src/thread.h +++ b/src/thread.h @@ -73,13 +73,12 @@ struct Thread : public ThreadBase { int maxPly; volatile bool searching; - // Data per thread. Position pos; Search::RootMoveVector rootMoves; Search::Stack stack[MAX_PLY+4]; HistoryStats History; MovesStats Countermoves; - Depth rootDepth; + Depth depth; }; From caba255a1cb99b31f2e9e7b8255180e23706e468 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Wed, 7 Oct 2015 09:26:53 +0200 Subject: [PATCH 04/21] Retire id_loop Inline its contents instead. No functional change. --- src/search.cpp | 73 +++++++++++++++++++++----------------------------- src/thread.h | 2 +- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 50a58baf685..5af48207447 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -137,7 +137,6 @@ namespace { template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth); - void id_loop(); Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply); void update_pv(Move* pv, Move move, Move* childPv); @@ -303,9 +302,35 @@ void Search::think() { Threads.timer->run = true; Threads.timer->notify_one(); // Start the recurring timer - id_loop(); // Let's start searching ! + BestMoveChanges = 0; + TT.new_search(); + + // Start the threads + for (Thread* th : Threads) + th->searching = true; + + Threads.main()->search(); + + // Stop the threads and timer + Signals.stop = true; Threads.timer->run = false; + + // Wait until all threads have finished + while (Threads.main()->slavesMask != 0) {} + + // Clear any candidate easy move that wasn't stable for the last search + // iterations; the second condition prevents consecutive fast moves. + if (EasyMove.stableCnt < 6 || Time.elapsed() < Time.available()) + EasyMove.clear(); + + size_t multiPV = Options["MultiPV"]; + Skill skill(Options["Skill Level"]); + + // If skill level is enabled, swap best PV line with the sub-optimal one + if (skill.enabled()) + std::swap(Threads.main()->rootMoves[0], *std::find(Threads.main()->rootMoves.begin(), + Threads.main()->rootMoves.end(), skill.best_move(multiPV))); } // When playing in 'nodes as time' mode, subtract the searched nodes from @@ -333,11 +358,11 @@ void Search::think() { } -// Thread::id_loop() is the main iterative deepening loop. It calls search() +// Thread::search() is the main iterative deepening loop. It calls search() // repeatedly with increasing depth until the allocated thinking time has been // consumed, user stops the search, or the maximum search depth is reached. -void Thread::id_loop() { +void Thread::search() { Threads.main()->mutex.lock(); Threads.main()->slavesMask.set(idx); @@ -415,7 +440,7 @@ void Thread::id_loop() { // high/low anymore. while (true) { - bestValue = search(pos, ss, alpha, beta, depth, false); + bestValue = ::search(pos, ss, alpha, beta, depth, false); // Bring the best move to the front. It is critical that sorting // is done with a stable algorithm because all the values but the @@ -542,44 +567,8 @@ void Thread::id_loop() { } - namespace { - // id_loop() calls Thread::id_loop() - - void id_loop() { - - BestMoveChanges = 0; - - TT.new_search(); - - // Start search for the other threads - for (Thread* th : Threads) - th->searching = true; - - Threads.main()->id_loop(); - - // Force a quicker exit of fixed depth searches - Signals.stop = true; - - // Wait until all threads have finished. - while (Threads.main()->slavesMask != 0) {} - - // Clear any candidate easy move that wasn't stable for the last search - // iterations; the second condition prevents consecutive fast moves. - if (EasyMove.stableCnt < 6 || Time.elapsed() < Time.available()) - EasyMove.clear(); - - size_t multiPV = Options["MultiPV"]; - Skill skill(Options["Skill Level"]); - - // If skill level is enabled, swap best PV line with the sub-optimal one - if (skill.enabled()) - std::swap(Threads.main()->rootMoves[0], *std::find(Threads.main()->rootMoves.begin(), - Threads.main()->rootMoves.end(), skill.best_move(multiPV))); - } - - // search<>() is the main search function for both PV and non-PV nodes and for // normal and SplitPoint nodes. When called just after a split point the search // is simpler because we have already probed the hash table, done a null move @@ -1591,7 +1580,7 @@ void Thread::idle_loop() { { // If this thread has been assigned work, launch a search if (searching) - this->id_loop(); + this->search(); // If search is finished then sleep if (!Threads.main()->thinking) diff --git a/src/thread.h b/src/thread.h index e9f232180d6..360a0c395c7 100644 --- a/src/thread.h +++ b/src/thread.h @@ -64,7 +64,7 @@ struct Thread : public ThreadBase { Thread(); virtual void idle_loop(); - virtual void id_loop(); + void search(); Pawns::Table pawnsTable; Material::Table materialTable; From 32d2c4e12b145d2bdb8f47c03ba7d0e6e318a4c1 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Wed, 7 Oct 2015 09:31:53 +0200 Subject: [PATCH 05/21] Move Thread::idle_loop() where it belongs No functional change. --- src/search.cpp | 21 --------------------- src/thread.cpp | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 5af48207447..4dd459ae6ef 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1572,27 +1572,6 @@ bool RootMove::extract_ponder_from_tt(Position& pos) } -/// Thread::idle_loop() is where the thread is parked when it has no work to do - -void Thread::idle_loop() { - - while (!exit) - { - // If this thread has been assigned work, launch a search - if (searching) - this->search(); - - // If search is finished then sleep - if (!Threads.main()->thinking) - { - std::unique_lock lk(mutex); - while (!exit && !Threads.main()->thinking) - sleepCondition.wait(lk); - } - } -} - - /// check_time() is called by the timer thread when the timer triggers. It is /// used to print debug info and, more importantly, to detect when we are out of /// available time and thus stop the search. diff --git a/src/thread.cpp b/src/thread.cpp index 14d3bab1ae1..ac4c170eb3a 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -106,6 +106,27 @@ void TimerThread::idle_loop() { } +// Thread::idle_loop() is where the thread is parked when it has no work to do + +void Thread::idle_loop() { + + while (!exit) + { + // If this thread has been assigned work, launch a search + if (searching) + this->search(); + + // If search is finished then sleep + if (!Threads.main()->thinking) + { + std::unique_lock lk(mutex); + while (!exit && !Threads.main()->thinking) + sleepCondition.wait(lk); + } + } +} + + // MainThread::idle_loop() is where the main thread is parked waiting to be started // when there is a new search. The main thread will launch all the slave threads. From 9c587288da70b86d7f5eccbc9a9bb5891f121317 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Wed, 7 Oct 2015 15:38:39 +0200 Subject: [PATCH 06/21] Simplify locking Retire slavesMask. We don't need it. It si enough 'searching' and 'thinking' flags. Further simplification is still possible, perhaps we could use a single flag. No functional change. --- src/search.cpp | 22 +++++++--------------- src/thread.cpp | 18 ++++++++++++------ src/thread.h | 2 +- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 4dd459ae6ef..45dc7e64736 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -291,12 +291,13 @@ void Search::think() { { th->maxPly = 0; th->depth = DEPTH_ZERO; + th->searching = true; if (th != Threads.main()) { th->pos = Position(rootPos, th); th->rootMoves = rootMoves; + th->notify_one(); // Wake up the thread and start searching } - th->notify_one(); // Wake up all the threads } Threads.timer->run = true; @@ -306,10 +307,6 @@ void Search::think() { TT.new_search(); - // Start the threads - for (Thread* th : Threads) - th->searching = true; - Threads.main()->search(); // Stop the threads and timer @@ -317,7 +314,9 @@ void Search::think() { Threads.timer->run = false; // Wait until all threads have finished - while (Threads.main()->slavesMask != 0) {} + for (Thread* th : Threads) + if (th != Threads.main()) + th->wait_while(th->searching); // Clear any candidate easy move that wasn't stable for the last search // iterations; the second condition prevents consecutive fast moves. @@ -346,7 +345,7 @@ void Search::think() { if (!Signals.stop && (Limits.ponder || Limits.infinite)) { Signals.stopOnPonderhit = true; - rootPos.this_thread()->wait_for(Signals.stop); + Threads.main()->wait_for(Signals.stop); } sync_cout << "bestmove " << UCI::move(rootMoves[0].pv[0], rootPos.is_chess960()); @@ -364,10 +363,6 @@ void Search::think() { void Thread::search() { - Threads.main()->mutex.lock(); - Threads.main()->slavesMask.set(idx); - Threads.main()->mutex.unlock(); - Value bestValue, alpha, beta, delta; Move easyMove = MOVE_NONE; @@ -560,10 +555,7 @@ void Thread::search() { } searching = false; - - Threads.main()->mutex.lock(); - Threads.main()->slavesMask.reset(idx); - Threads.main()->mutex.unlock(); + notify_one(); // Wake up main if is sleeping waiting for us } diff --git a/src/thread.cpp b/src/thread.cpp index ac4c170eb3a..558cc5ef4b8 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -75,6 +75,15 @@ void ThreadBase::wait_for(volatile const bool& condition) { } +// ThreadBase::wait_while() set the thread to sleep until 'condition' turns false + +void ThreadBase::wait_while(volatile const bool& condition) { + + std::unique_lock lk(mutex); + sleepCondition.wait(lk, [&]{ return !condition; }); +} + + // Thread c'tor makes some init but does not launch any execution thread that // will be started only when c'tor returns. @@ -112,10 +121,6 @@ void Thread::idle_loop() { while (!exit) { - // If this thread has been assigned work, launch a search - if (searching) - this->search(); - // If search is finished then sleep if (!Threads.main()->thinking) { @@ -123,6 +128,9 @@ void Thread::idle_loop() { while (!exit && !Threads.main()->thinking) sleepCondition.wait(lk); } + + if (!exit && searching) + this->search(); } } @@ -147,9 +155,7 @@ void MainThread::idle_loop() { lk.unlock(); if (!exit) - { Search::think(); - } } } diff --git a/src/thread.h b/src/thread.h index 360a0c395c7..0ba76971610 100644 --- a/src/thread.h +++ b/src/thread.h @@ -48,6 +48,7 @@ struct ThreadBase : public std::thread { virtual void idle_loop() = 0; void notify_one(); void wait_for(volatile const bool& b); + void wait_while(volatile const bool& b); Mutex mutex; ConditionVariable sleepCondition; @@ -89,7 +90,6 @@ struct MainThread : public Thread { virtual void idle_loop(); void join(); volatile bool thinking = true; // Avoid a race with start_thinking() - std::bitset slavesMask; }; struct TimerThread : public ThreadBase { From 309cc4fcd8b83bc7240442547e7d0b1f1abb28be Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 8 Oct 2015 08:20:47 +0200 Subject: [PATCH 07/21] Further reformat lazy smp No functional change. --- src/search.cpp | 68 ++++++++++++++++++++++++-------------------------- src/search.h | 1 - src/thread.cpp | 8 +++--- src/thread.h | 5 ++-- 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 45dc7e64736..08bfc63e6c4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -40,7 +40,6 @@ namespace Search { volatile SignalsType Signals; LimitsType Limits; StateStackPtr SetupStates; - CounterMovesHistoryStats CounterMovesHistory; } namespace Tablebases { @@ -130,6 +129,7 @@ namespace { EasyMoveManager EasyMove; double BestMoveChanges; Value DrawValue[COLOR_NB]; + CounterMovesHistoryStats CounterMovesHistory; template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); @@ -219,16 +219,14 @@ uint64_t Search::perft(Position& pos, Depth depth) { template uint64_t Search::perft(Position& pos, Depth depth); -/// Search::think() is the external interface to Stockfish's search, and is -/// called by the main thread when the program receives the UCI 'go' command. It -/// searches from RootPos and at the end prints the "bestmove" to output. +/// MainThread::think() is called by the main thread when the program receives +/// the UCI 'go' command. It searches from root position and at the end prints +/// the "bestmove" to output. -void Search::think() { +void MainThread::think() { - Position& rootPos = Threads.main()->pos; - Search::RootMoveVector& rootMoves = Threads.main()->rootMoves; - Color us = rootPos.side_to_move(); - Time.init(Limits, us, rootPos.game_ply(), now()); + Color us = pos.side_to_move(); + Time.init(Limits, us, pos.game_ply(), now()); int contempt = Options["Contempt"] * PawnValueEg / 100; // From centipawns DrawValue[ us] = VALUE_DRAW - Value(contempt); @@ -251,17 +249,17 @@ void Search::think() { { rootMoves.push_back(RootMove(MOVE_NONE)); sync_cout << "info depth 0 score " - << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) + << UCI::value(pos.checkers() ? -VALUE_MATE : VALUE_DRAW) << sync_endl; } else { - if (TB::Cardinality >= rootPos.count(WHITE) - + rootPos.count(BLACK)) + if (TB::Cardinality >= pos.count(WHITE) + + pos.count(BLACK)) { // If the current root position is in the tablebases then RootMoves // contains only moves that preserve the draw or win. - TB::RootInTB = Tablebases::root_probe(rootPos, rootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe(pos, rootMoves, TB::Score); if (TB::RootInTB) TB::Cardinality = 0; // Do not probe tablebases during the search @@ -269,7 +267,7 @@ void Search::think() { else // If DTZ tables are missing, use WDL tables as a fallback { // Filter out moves that do not preserve a draw or win - TB::RootInTB = Tablebases::root_probe_wdl(rootPos, rootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe_wdl(pos, rootMoves, TB::Score); // Only probe during search if winning if (TB::Score <= VALUE_DRAW) @@ -292,9 +290,9 @@ void Search::think() { th->maxPly = 0; th->depth = DEPTH_ZERO; th->searching = true; - if (th != Threads.main()) + if (th != this) { - th->pos = Position(rootPos, th); + th->pos = Position(pos, th); th->rootMoves = rootMoves; th->notify_one(); // Wake up the thread and start searching } @@ -303,19 +301,15 @@ void Search::think() { Threads.timer->run = true; Threads.timer->notify_one(); // Start the recurring timer - BestMoveChanges = 0; - - TT.new_search(); - - Threads.main()->search(); + search(true); // Here we go! - // Stop the threads and timer + // Stop the threads and the timer Signals.stop = true; Threads.timer->run = false; // Wait until all threads have finished for (Thread* th : Threads) - if (th != Threads.main()) + if (th != this) th->wait_while(th->searching); // Clear any candidate easy move that wasn't stable for the last search @@ -328,8 +322,8 @@ void Search::think() { // If skill level is enabled, swap best PV line with the sub-optimal one if (skill.enabled()) - std::swap(Threads.main()->rootMoves[0], *std::find(Threads.main()->rootMoves.begin(), - Threads.main()->rootMoves.end(), skill.best_move(multiPV))); + std::swap(rootMoves[0], *std::find(rootMoves.begin(), + rootMoves.end(), skill.best_move(multiPV))); } // When playing in 'nodes as time' mode, subtract the searched nodes from @@ -345,13 +339,13 @@ void Search::think() { if (!Signals.stop && (Limits.ponder || Limits.infinite)) { Signals.stopOnPonderhit = true; - Threads.main()->wait_for(Signals.stop); + wait(Signals.stop); } - sync_cout << "bestmove " << UCI::move(rootMoves[0].pv[0], rootPos.is_chess960()); + sync_cout << "bestmove " << UCI::move(rootMoves[0].pv[0], pos.is_chess960()); - if (rootMoves[0].pv.size() > 1 || rootMoves[0].extract_ponder_from_tt(rootPos)) - std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], rootPos.is_chess960()); + if (rootMoves[0].pv.size() > 1 || rootMoves[0].extract_ponder_from_tt(pos)) + std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], pos.is_chess960()); std::cout << sync_endl; } @@ -361,15 +355,17 @@ void Search::think() { // repeatedly with increasing depth until the allocated thinking time has been // consumed, user stops the search, or the maximum search depth is reached. -void Thread::search() { +void Thread::search(bool isMainThread) { Value bestValue, alpha, beta, delta; Move easyMove = MOVE_NONE; - if (this == Threads.main()) + if (isMainThread) { easyMove = EasyMove.get(pos.key()); EasyMove.clear(); + BestMoveChanges = 0; + TT.new_search(); } Stack* ss = stack+2; // To allow referencing (ss-2) and (ss+2) @@ -396,7 +392,7 @@ void Thread::search() { // The main thread modifies other threads rootDepth, if it is <= main // thread depth. The new depth will take effect after the other thread // returns to id_loop(). - if (this == Threads.main()) + if (isMainThread) { ++depth; for (Thread* th : Threads) @@ -411,7 +407,7 @@ void Thread::search() { break; // Age out PV variability metric - if (this == Threads.main()) + if (isMainThread) BestMoveChanges *= 0.5; // Save the last iteration's scores before first PV line is searched and @@ -457,7 +453,7 @@ void Thread::search() { if (Signals.stop) break; - if (this == Threads.main()) + if (isMainThread) { // When failing high/low give some update (without cluttering // the UI) before a re-search. @@ -474,7 +470,7 @@ void Thread::search() { beta = (alpha + beta) / 2; alpha = std::max(bestValue - delta, -VALUE_INFINITE); - if (this == Threads.main()) + if (isMainThread) { Signals.failedLowAtRoot = true; Signals.stopOnPonderhit = false; @@ -507,7 +503,7 @@ void Thread::search() { sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; } - if (this != Threads.main()) + if (!isMainThread) continue; // If skill level is enabled and time is up, pick a sub-optimal best move diff --git a/src/search.h b/src/search.h index eaf978eceec..81f7179cacc 100644 --- a/src/search.h +++ b/src/search.h @@ -104,7 +104,6 @@ extern LimitsType Limits; extern StateStackPtr SetupStates; void init(); -void think(); void reset(); template uint64_t perft(Position& pos, Depth depth); diff --git a/src/thread.cpp b/src/thread.cpp index 558cc5ef4b8..1994e97fab8 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -66,9 +66,9 @@ void ThreadBase::notify_one() { } -// ThreadBase::wait_for() set the thread to sleep until 'condition' turns true +// ThreadBase::wait() set the thread to sleep until 'condition' turns true -void ThreadBase::wait_for(volatile const bool& condition) { +void ThreadBase::wait(volatile const bool& condition) { std::unique_lock lk(mutex); sleepCondition.wait(lk, [&]{ return condition; }); @@ -130,7 +130,7 @@ void Thread::idle_loop() { } if (!exit && searching) - this->search(); + search(); } } @@ -155,7 +155,7 @@ void MainThread::idle_loop() { lk.unlock(); if (!exit) - Search::think(); + think(); } } diff --git a/src/thread.h b/src/thread.h index 0ba76971610..db242837203 100644 --- a/src/thread.h +++ b/src/thread.h @@ -47,7 +47,7 @@ struct ThreadBase : public std::thread { virtual ~ThreadBase() = default; virtual void idle_loop() = 0; void notify_one(); - void wait_for(volatile const bool& b); + void wait(volatile const bool& b); void wait_while(volatile const bool& b); Mutex mutex; @@ -65,7 +65,7 @@ struct Thread : public ThreadBase { Thread(); virtual void idle_loop(); - void search(); + void search(bool isMainThread = false); Pawns::Table pawnsTable; Material::Table materialTable; @@ -89,6 +89,7 @@ struct Thread : public ThreadBase { struct MainThread : public Thread { virtual void idle_loop(); void join(); + void think(); volatile bool thinking = true; // Avoid a race with start_thinking() }; From dee1bd8ebcaecde447143d43118a57fc02ef441c Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 8 Oct 2015 09:26:07 +0200 Subject: [PATCH 08/21] Further tweak locks No functional change. --- src/thread.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/thread.cpp b/src/thread.cpp index 1994e97fab8..050e58c3a71 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -109,7 +109,7 @@ void TimerThread::idle_loop() { lk.unlock(); - if (run) + if (!exit && run) check_time(); } } @@ -121,13 +121,12 @@ void Thread::idle_loop() { while (!exit) { - // If search is finished then sleep - if (!Threads.main()->thinking) - { - std::unique_lock lk(mutex); - while (!exit && !Threads.main()->thinking) - sleepCondition.wait(lk); - } + std::unique_lock lk(mutex); + + while (!Threads.main()->thinking && !exit) + sleepCondition.wait(lk); + + lk.unlock(); if (!exit && searching) search(); @@ -160,7 +159,7 @@ void MainThread::idle_loop() { } -// MainThread::join() waits for main thread to finish the search +// MainThread::join() waits for main thread to finish thinking void MainThread::join() { From 494aeb199daaea34621546532392b1fe146cfb54 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 8 Oct 2015 10:11:16 +0200 Subject: [PATCH 09/21] Fix a crash on exit In Thread::idle_loop() we now access main thread data, namely Threads.main()->thinking So upon exit we have to ensure main thread is the last one to be deleted. It can be easily reproduced setting more then one thread and then quitting. This bug happens also in original lazy_smp but for some reason remains hidden. Although a crash, this should not compromise TCEC version because it occurs only upon exiting the engine. No functional change. --- src/thread.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/thread.cpp b/src/thread.cpp index 050e58c3a71..2b94cf7bccc 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -190,8 +190,10 @@ void ThreadPool::exit() { timer = nullptr; for (Thread* th : *this) - delete_thread(th); + if (th != Threads.main()) + delete_thread(th); + delete_thread(Threads.main()); // Must be the last one clear(); // Get rid of stale pointers } From 8a2c8c58cae676b57bd1c2178f519b67f92f717f Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Thu, 8 Oct 2015 16:45:21 +0200 Subject: [PATCH 10/21] Better fix of crash bug Crash is due to slave thread accessing main thread data while exiting. Fix it in the proper way makeing slave threads independent from main state. No functional change. --- src/thread.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/thread.cpp b/src/thread.cpp index 2b94cf7bccc..a35f521fa23 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -123,7 +123,7 @@ void Thread::idle_loop() { { std::unique_lock lk(mutex); - while (!Threads.main()->thinking && !exit) + while (!searching && !exit) sleepCondition.wait(lk); lk.unlock(); @@ -190,10 +190,8 @@ void ThreadPool::exit() { timer = nullptr; for (Thread* th : *this) - if (th != Threads.main()) - delete_thread(th); + delete_thread(th); - delete_thread(Threads.main()); // Must be the last one clear(); // Get rid of stale pointers } From f6512a40922d4ee143d81fb68e797a3491a5c596 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Fri, 9 Oct 2015 16:55:25 +0200 Subject: [PATCH 11/21] Move EasyMove logic to its original place And other small fixes. No functional change. --- src/search.cpp | 53 ++++++++++++++++++++++---------------------------- src/thread.cpp | 6 +++--- src/thread.h | 2 +- 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 08bfc63e6c4..9b2d705e285 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -311,19 +311,6 @@ void MainThread::think() { for (Thread* th : Threads) if (th != this) th->wait_while(th->searching); - - // Clear any candidate easy move that wasn't stable for the last search - // iterations; the second condition prevents consecutive fast moves. - if (EasyMove.stableCnt < 6 || Time.elapsed() < Time.available()) - EasyMove.clear(); - - size_t multiPV = Options["MultiPV"]; - Skill skill(Options["Skill Level"]); - - // If skill level is enabled, swap best PV line with the sub-optimal one - if (skill.enabled()) - std::swap(rootMoves[0], *std::find(rootMoves.begin(), - rootMoves.end(), skill.best_move(multiPV))); } // When playing in 'nodes as time' mode, subtract the searched nodes from @@ -551,7 +538,20 @@ void Thread::search(bool isMainThread) { } searching = false; - notify_one(); // Wake up main if is sleeping waiting for us + notify_one(); // Wake up main thread if is sleeping waiting for us + + if (!isMainThread) + return; + + // Clear any candidate easy move that wasn't stable for the last search + // iterations; the second condition prevents consecutive fast moves. + if (EasyMove.stableCnt < 6 || Time.elapsed() < Time.available()) + EasyMove.clear(); + + // If skill level is enabled, swap best PV line with the sub-optimal one + if (skill.enabled()) + std::swap(rootMoves[0], *std::find(rootMoves.begin(), + rootMoves.end(), skill.best_move(multiPV))); } @@ -910,9 +910,7 @@ namespace { // Move count based pruning if ( depth < 16 * ONE_PLY && moveCount >= FutilityMoveCounts[improving][depth]) - { continue; - } predictedDepth = newDepth - reduction(improving, depth, moveCount); @@ -930,9 +928,7 @@ namespace { // Prune moves with negative SEE at low depths if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO) - { continue; - } } // Speculative prefetch as early as possible @@ -990,12 +986,10 @@ namespace { // Step 16. Full depth search, when LMR is skipped or fails high if (doFullDepthSearch) - { value = newDepth < ONE_PLY ? givesCheck ? -qsearch(pos, ss+1, -(alpha+1), -alpha, DEPTH_ZERO) : -qsearch(pos, ss+1, -(alpha+1), -alpha, DEPTH_ZERO) : - search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); - } // For PV nodes only, do a full PV search on the first move or after a fail // high (in the latter case search only if value < beta), otherwise let the @@ -1082,11 +1076,11 @@ namespace { quietsSearched[quietCount++] = move; } - // Following condition would detect a stop or a cutoff set only after move - // loop has been completed. But in this case bestValue is valid because we - // have fully searched our subtree, and we can anyhow save the result in TT. + // Following condition would detect a stop only after move loop has been + // completed. But in this case bestValue is valid because we have fully + // searched our subtree, and we can anyhow save the result in TT. /* - if (Signals.stop || thisThread->cutoff_occurred()) + if (Signals.stop) return VALUE_DRAW; */ @@ -1383,25 +1377,24 @@ namespace { ss->killers[0] = move; } - Thread *th = pos.this_thread(); // Shorthand - Value bonus = Value((depth / ONE_PLY) * (depth / ONE_PLY)); Square prevSq = to_sq((ss-1)->currentMove); HistoryStats& cmh = CounterMovesHistory[pos.piece_on(prevSq)][prevSq]; + Thread* thisThread = pos.this_thread(); - th->History.updateH(pos.moved_piece(move), to_sq(move), bonus); + thisThread->History.updateH(pos.moved_piece(move), to_sq(move), bonus); if (is_ok((ss-1)->currentMove)) { - th->Countermoves.update(pos.piece_on(prevSq), prevSq, move); + thisThread->Countermoves.update(pos.piece_on(prevSq), prevSq, move); cmh.updateCMH(pos.moved_piece(move), to_sq(move), bonus); } // Decrease all the other played quiet moves for (int i = 0; i < quietsCnt; ++i) { - th->History.updateH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); + thisThread->History.updateH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); if (is_ok((ss-1)->currentMove)) cmh.updateCMH(pos.moved_piece(quiets[i]), to_sq(quiets[i]), -bonus); @@ -1594,7 +1587,7 @@ void check_time() { else if (Limits.nodes) { - if ((int64_t)Threads.nodes_searched() >= Limits.nodes) + if (Threads.nodes_searched() >= Limits.nodes) Signals.stop = true; } } diff --git a/src/thread.cpp b/src/thread.cpp index a35f521fa23..29484273cb1 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -219,11 +219,11 @@ void ThreadPool::read_uci_options() { } -// ThreadPool::nodes_searched() returns the number of nodes searched. +// ThreadPool::nodes_searched() returns the number of nodes searched -uint64_t ThreadPool::nodes_searched() { +int64_t ThreadPool::nodes_searched() { - uint64_t nodes = 0; + int64_t nodes = 0; for (Thread *th : *this) nodes += th->pos.nodes_searched(); return nodes; diff --git a/src/thread.h b/src/thread.h index db242837203..0c6cdd5af7e 100644 --- a/src/thread.h +++ b/src/thread.h @@ -115,7 +115,7 @@ struct ThreadPool : public std::vector { MainThread* main() { return static_cast(at(0)); } void read_uci_options(); void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&); - uint64_t nodes_searched(); + int64_t nodes_searched(); TimerThread* timer; }; From 06801f8dca0f03be149d78c6e3fa374c3cca9f3b Mon Sep 17 00:00:00 2001 From: Ivan Ivec Date: Fri, 9 Oct 2015 17:19:26 +0200 Subject: [PATCH 12/21] Use a log formula for depths The SPRT LTC test on 3 threads was like this: LLR: 2.95 (-2.94,2.94) [0.00,5.00] Total: 25653 W: 3730 L: 3521 D: 18402 The ELO STC test on 3 threads was like this: ELO: 6.79 +-3.4 (95%) LOS: 100.0% Total: 13716 W: 2435 L: 2167 D: 9114 The ELO STC test on 23 threads was like this: ELO: 0.77 +-5.3 (95%) LOS: 61.1% Total: 4970 W: 765 L: 754 D: 3451 bench: 8397672 --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 9b2d705e285..aef6cf2b012 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -384,11 +384,11 @@ void Thread::search(bool isMainThread) { ++depth; for (Thread* th : Threads) if (th != this && th->depth <= depth) - th->depth = depth + ONE_PLY + Depth(th->idx - 1) / 2; + th->depth = depth + Depth(3 * log(1 + th->idx)); } else // This can cause a thread to search with the same depth for many iterations - depth = Threads.main()->depth + ONE_PLY + Depth(this->idx - 1) / 2; + depth = Threads.main()->depth + Depth(3 * log(1 + this->idx)); if (depth >= DEPTH_MAX || Signals.stop || (Limits.depth && depth > Limits.depth)) break; From a129fa8ee3e617becb514ac9368354c197c48830 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sat, 10 Oct 2015 07:37:57 +0200 Subject: [PATCH 13/21] Delay waiting for threads to finish Wait after best move is sentm so that waiting time is not accounted. No functional change. --- src/search.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index aef6cf2b012..450010a92e2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -306,11 +306,6 @@ void MainThread::think() { // Stop the threads and the timer Signals.stop = true; Threads.timer->run = false; - - // Wait until all threads have finished - for (Thread* th : Threads) - if (th != this) - th->wait_while(th->searching); } // When playing in 'nodes as time' mode, subtract the searched nodes from @@ -335,6 +330,12 @@ void MainThread::think() { std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], pos.is_chess960()); std::cout << sync_endl; + + // Wait until all threads have finished before returning. Best move is already + // sent, so this waiting time is not accounted. + for (Thread* th : Threads) + if (th != this) + th->wait_while(th->searching); } From e854b30c844c7060f39402f2d9dd4c1b76ba18d4 Mon Sep 17 00:00:00 2001 From: mstembera Date: Fri, 9 Oct 2015 14:28:25 -0700 Subject: [PATCH 14/21] Bug fix for MSVC Error error C2440: cannot convert from 'double' to 'Depth' No functional change. --- src/search.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 450010a92e2..344a0093803 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -385,11 +385,11 @@ void Thread::search(bool isMainThread) { ++depth; for (Thread* th : Threads) if (th != this && th->depth <= depth) - th->depth = depth + Depth(3 * log(1 + th->idx)); + th->depth = depth + Depth(int(3 * log(1 + th->idx))); } else // This can cause a thread to search with the same depth for many iterations - depth = Threads.main()->depth + Depth(3 * log(1 + this->idx)); + depth = Threads.main()->depth + Depth(int(3 * log(1 + this->idx))); if (depth >= DEPTH_MAX || Signals.stop || (Limits.depth && depth > Limits.depth)) break; From 47b524293087dac5516b30bc63781bb97f8dedf4 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sat, 10 Oct 2015 08:34:29 +0200 Subject: [PATCH 15/21] Set the search start time much earlier The start time is the time point we start our clock and in an ideal case it should be set to the same clock running in the tournament manager when it sends to the engine the 'go' UCI command. Currently it is set at the beginning of MainThread::think() but this means we have not accounted for the time to wake up the thread and the time it takes start_thinking to join() the main thread, possibly waiting for the slave threads to finish. In standard conditions we are talking of very few msecs, but with high number of cores and high time pressure, it is difficult to get a reliable estimate. So move it much earlier, just before processing the 'go' command. bench: 8397672 --- src/benchmark.cpp | 1 + src/search.cpp | 2 +- src/search.h | 1 + src/timeman.cpp | 4 ++-- src/timeman.h | 6 +++--- src/uci.cpp | 2 ++ 6 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index c683a38170d..8f3e6ae1939 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -156,6 +156,7 @@ void benchmark(const Position& current, istream& is) { else { Search::StateStackPtr st; + limits.startTime = now(); Threads.start_thinking(pos, limits, st); Threads.main()->join(); nodes += Threads.nodes_searched(); diff --git a/src/search.cpp b/src/search.cpp index 344a0093803..207616efbaf 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -226,7 +226,7 @@ template uint64_t Search::perft(Position& pos, Depth depth); void MainThread::think() { Color us = pos.side_to_move(); - Time.init(Limits, us, pos.game_ply(), now()); + Time.init(Limits, us, pos.game_ply()); int contempt = Options["Contempt"] * PawnValueEg / 100; // From centipawns DrawValue[ us] = VALUE_DRAW - Value(contempt); diff --git a/src/search.h b/src/search.h index 81f7179cacc..c7abb9dcfd9 100644 --- a/src/search.h +++ b/src/search.h @@ -88,6 +88,7 @@ struct LimitsType { std::vector searchmoves; int time[COLOR_NB], inc[COLOR_NB], npmsec, movestogo, depth, movetime, mate, infinite, ponder; int64_t nodes; + TimePoint startTime; }; /// The SignalsType struct stores volatile flags updated during the search diff --git a/src/timeman.cpp b/src/timeman.cpp index 7a5db255142..3a4e157f8b1 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -80,7 +80,7 @@ namespace { /// inc > 0 && movestogo == 0 means: x basetime + z increment /// inc > 0 && movestogo != 0 means: x moves in y minutes + z increment -void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, TimePoint now) +void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { int minThinkingTime = Options["Minimum Thinking Time"]; int moveOverhead = Options["Move Overhead"]; @@ -102,7 +102,7 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, TimePoi limits.npmsec = npmsec; } - start = now; + startTime = limits.startTime; unstablePvFactor = 1; optimumTime = maximumTime = std::max(limits.time[us], minThinkingTime); diff --git a/src/timeman.h b/src/timeman.h index 24632d798cd..b6eb3485c8d 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -29,16 +29,16 @@ class TimeManagement { public: - void init(Search::LimitsType& limits, Color us, int ply, TimePoint now); + void init(Search::LimitsType& limits, Color us, int ply); void pv_instability(double bestMoveChanges) { unstablePvFactor = 1 + bestMoveChanges; } int available() const { return int(optimumTime * unstablePvFactor * 0.76); } int maximum() const { return maximumTime; } - int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - start); } + int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - startTime); } int64_t availableNodes; // When in 'nodes as time' mode private: - TimePoint start; + TimePoint startTime; int optimumTime; int maximumTime; double unstablePvFactor; diff --git a/src/uci.cpp b/src/uci.cpp index 4e56542ab0f..c5dbafae32a 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -112,6 +112,8 @@ namespace { Search::LimitsType limits; string token; + limits.startTime = now(); // As early as possible! + while (is >> token) if (token == "searchmoves") while (is >> token) From 17f96bc6414fa58f9ec1dc8bc57ffeab9d312d7f Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Sun, 11 Oct 2015 03:10:34 +0800 Subject: [PATCH 16/21] Revert "Delay waiting for threads to finish" This reverts commit 35e649d70d6426a311325de221af797f864d171b. We need to be sure all threads are finished before sending 'bestmove'. --- src/search.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 207616efbaf..e65fbc91e2a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -306,6 +306,11 @@ void MainThread::think() { // Stop the threads and the timer Signals.stop = true; Threads.timer->run = false; + + // Wait until all threads have finished + for (Thread* th : Threads) + if (th != this) + th->wait_while(th->searching); } // When playing in 'nodes as time' mode, subtract the searched nodes from @@ -330,12 +335,6 @@ void MainThread::think() { std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], pos.is_chess960()); std::cout << sync_endl; - - // Wait until all threads have finished before returning. Best move is already - // sent, so this waiting time is not accounted. - for (Thread* th : Threads) - if (th != this) - th->wait_while(th->searching); } From 7b9e83f6046ee7b5f18cfa4c1899131327da59b2 Mon Sep 17 00:00:00 2001 From: mstembera Date: Mon, 12 Oct 2015 17:40:43 -0700 Subject: [PATCH 17/21] Back to @mbootsector original easy move fix which is the correct one. --- src/search.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/search.cpp b/src/search.cpp index e65fbc91e2a..ea54c44c8b8 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1053,6 +1053,7 @@ namespace { { // If there is an easy move for this position, clear it if unstable if ( PvNode + && thisThread == Threads.main() && EasyMove.get(pos.key()) && (move != EasyMove.get(pos.key()) || moveCount > 1)) EasyMove.clear(); From a75e45b37e8d2d92b77b4295de2519712c0d7d51 Mon Sep 17 00:00:00 2001 From: joergoster Date: Mon, 12 Oct 2015 08:41:29 +0200 Subject: [PATCH 18/21] Simplify altering the search depth of the helper threads As a side-effect, the old iterative deepening loop logic is being restored. bench: 8116244 (after rebasing on new master) --- src/search.cpp | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index ea54c44c8b8..d31befca71a 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -372,27 +372,12 @@ void Thread::search(bool isMainThread) { multiPV = std::min(multiPV, rootMoves.size()); // Iterative deepening loop until requested to stop or target depth reached - while (true) + while (++depth < DEPTH_MAX && !Signals.stop && (!Limits.depth || depth <= Limits.depth)) { - // Set up our new depth - - // The main thread modifies other threads rootDepth, if it is <= main - // thread depth. The new depth will take effect after the other thread - // returns to id_loop(). - if (isMainThread) - { - ++depth; - for (Thread* th : Threads) - if (th != this && th->depth <= depth) - th->depth = depth + Depth(int(3 * log(1 + th->idx))); - } - else - // This can cause a thread to search with the same depth for many iterations + // Set up our new depth for the helper threads + if (!isMainThread) depth = Threads.main()->depth + Depth(int(3 * log(1 + this->idx))); - if (depth >= DEPTH_MAX || Signals.stop || (Limits.depth && depth > Limits.depth)) - break; - // Age out PV variability metric if (isMainThread) BestMoveChanges *= 0.5; @@ -479,7 +464,7 @@ void Thread::search(bool isMainThread) { // Sort the PV lines searched so far and update the GUI std::stable_sort(rootMoves.begin(), rootMoves.begin() + PVIdx + 1); - if (this != Threads.main()) + if (!isMainThread) break; if (Signals.stop) From 6f997a83d6cf3a9ddf93768ae2978c37602a67f6 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sat, 17 Oct 2015 09:00:48 +0200 Subject: [PATCH 19/21] Small code stye fixes Among them restore the original rootPos name for the starting position. No functional change. --- src/search.cpp | 94 +++++++++++++++++++++++--------------------------- src/thread.cpp | 10 +++--- src/thread.h | 2 +- 3 files changed, 50 insertions(+), 56 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d31befca71a..3bcb7ef3b7e 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -225,8 +225,8 @@ template uint64_t Search::perft(Position& pos, Depth depth); void MainThread::think() { - Color us = pos.side_to_move(); - Time.init(Limits, us, pos.game_ply()); + Color us = rootPos.side_to_move(); + Time.init(Limits, us, rootPos.game_ply()); int contempt = Options["Contempt"] * PawnValueEg / 100; // From centipawns DrawValue[ us] = VALUE_DRAW - Value(contempt); @@ -249,17 +249,17 @@ void MainThread::think() { { rootMoves.push_back(RootMove(MOVE_NONE)); sync_cout << "info depth 0 score " - << UCI::value(pos.checkers() ? -VALUE_MATE : VALUE_DRAW) + << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) << sync_endl; } else { - if (TB::Cardinality >= pos.count(WHITE) - + pos.count(BLACK)) + if (TB::Cardinality >= rootPos.count(WHITE) + + rootPos.count(BLACK)) { // If the current root position is in the tablebases then RootMoves // contains only moves that preserve the draw or win. - TB::RootInTB = Tablebases::root_probe(pos, rootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe(rootPos, rootMoves, TB::Score); if (TB::RootInTB) TB::Cardinality = 0; // Do not probe tablebases during the search @@ -267,7 +267,7 @@ void MainThread::think() { else // If DTZ tables are missing, use WDL tables as a fallback { // Filter out moves that do not preserve a draw or win - TB::RootInTB = Tablebases::root_probe_wdl(pos, rootMoves, TB::Score); + TB::RootInTB = Tablebases::root_probe_wdl(rootPos, rootMoves, TB::Score); // Only probe during search if winning if (TB::Score <= VALUE_DRAW) @@ -292,7 +292,7 @@ void MainThread::think() { th->searching = true; if (th != this) { - th->pos = Position(pos, th); + th->rootPos = Position(rootPos, th); th->rootMoves = rootMoves; th->notify_one(); // Wake up the thread and start searching } @@ -301,7 +301,7 @@ void MainThread::think() { Threads.timer->run = true; Threads.timer->notify_one(); // Start the recurring timer - search(true); // Here we go! + search(true); // Let's start searching! // Stop the threads and the timer Signals.stop = true; @@ -329,10 +329,10 @@ void MainThread::think() { wait(Signals.stop); } - sync_cout << "bestmove " << UCI::move(rootMoves[0].pv[0], pos.is_chess960()); + sync_cout << "bestmove " << UCI::move(rootMoves[0].pv[0], rootPos.is_chess960()); - if (rootMoves[0].pv.size() > 1 || rootMoves[0].extract_ponder_from_tt(pos)) - std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], pos.is_chess960()); + if (rootMoves[0].pv.size() > 1 || rootMoves[0].extract_ponder_from_tt(rootPos)) + std::cout << " ponder " << UCI::move(rootMoves[0].pv[1], rootPos.is_chess960()); std::cout << sync_endl; } @@ -344,23 +344,23 @@ void MainThread::think() { void Thread::search(bool isMainThread) { + Stack* ss = stack + 2; // To allow referencing (ss-2) and (ss+2) Value bestValue, alpha, beta, delta; - Move easyMove = MOVE_NONE; + + std::memset(ss-2, 0, 5 * sizeof(Stack)); + + bestValue = delta = alpha = -VALUE_INFINITE; + beta = VALUE_INFINITE; + if (isMainThread) { - easyMove = EasyMove.get(pos.key()); + easyMove = EasyMove.get(rootPos.key()); EasyMove.clear(); BestMoveChanges = 0; TT.new_search(); } - Stack* ss = stack+2; // To allow referencing (ss-2) and (ss+2) - std::memset(ss-2, 0, 5 * sizeof(Stack)); - - bestValue = delta = alpha = -VALUE_INFINITE; - beta = VALUE_INFINITE; - size_t multiPV = Options["MultiPV"]; Skill skill(Options["Skill Level"]); @@ -374,7 +374,7 @@ void Thread::search(bool isMainThread) { // Iterative deepening loop until requested to stop or target depth reached while (++depth < DEPTH_MAX && !Signals.stop && (!Limits.depth || depth <= Limits.depth)) { - // Set up our new depth for the helper threads + // Set up the new depth for the helper threads if (!isMainThread) depth = Threads.main()->depth + Depth(int(3 * log(1 + this->idx))); @@ -403,7 +403,7 @@ void Thread::search(bool isMainThread) { // high/low anymore. while (true) { - bestValue = ::search(pos, ss, alpha, beta, depth, false); + bestValue = ::search(rootPos, ss, alpha, beta, depth, false); // Bring the best move to the front. It is critical that sorting // is done with a stable algorithm because all the values but the @@ -411,13 +411,12 @@ void Thread::search(bool isMainThread) { // and we want to keep the same order for all the moves except the // new PV that goes to the front. Note that in case of MultiPV // search the already searched PV lines are preserved. - std::stable_sort(rootMoves.begin() + PVIdx, rootMoves.end()); // Write PV back to transposition table in case the relevant // entries have been overwritten during the search. for (size_t i = 0; i <= PVIdx; ++i) - rootMoves[i].insert_pv_in_tt(pos); + rootMoves[i].insert_pv_in_tt(rootPos); // If search has been stopped break immediately. Sorting and // writing PV back to TT is safe because RootMoves is still @@ -425,15 +424,13 @@ void Thread::search(bool isMainThread) { if (Signals.stop) break; - if (isMainThread) - { - // When failing high/low give some update (without cluttering - // the UI) before a re-search. - if (multiPV == 1 - && (bestValue <= alpha || bestValue >= beta) - && Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; - } + // When failing high/low give some update (without cluttering + // the UI) before a re-search. + if ( isMainThread + && multiPV == 1 + && (bestValue <= alpha || bestValue >= beta) + && Time.elapsed() > 3000) + sync_cout << UCI::pv(rootPos, depth, alpha, beta) << sync_endl; // In case of failing low/high increase aspiration window and // re-search, otherwise exit the loop. @@ -472,7 +469,7 @@ void Thread::search(bool isMainThread) { << " time " << Time.elapsed() << sync_endl; else if (PVIdx + 1 == multiPV || Time.elapsed() > 3000) - sync_cout << UCI::pv(pos, depth, alpha, beta) << sync_endl; + sync_cout << UCI::pv(rootPos, depth, alpha, beta) << sync_endl; } if (!isMainThread) @@ -516,7 +513,7 @@ void Thread::search(bool isMainThread) { } if (rootMoves[0].pv.size() >= 3) - EasyMove.update(pos, rootMoves[0].pv); + EasyMove.update(rootPos, rootMoves[0].pv); else EasyMove.clear(); } @@ -1436,22 +1433,22 @@ namespace { string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { - Thread* thisThread = pos.this_thread(); std::stringstream ss; int elapsed = Time.elapsed() + 1; - size_t multiPV = std::min((size_t)Options["MultiPV"], thisThread->rootMoves.size()); - int selDepth = thisThread->maxPly; - uint64_t nodes = Threads.nodes_searched(); + const Search::RootMoveVector& rootMoves = pos.this_thread()->rootMoves; + size_t PVIdx = pos.this_thread()->PVIdx; + size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size()); + uint64_t nodes_searched = Threads.nodes_searched(); for (size_t i = 0; i < multiPV; ++i) { - bool updated = (i <= thisThread->PVIdx); + bool updated = (i <= PVIdx); if (depth == ONE_PLY && !updated) continue; Depth d = updated ? depth : depth - ONE_PLY; - Value v = updated ? thisThread->rootMoves[i].score : thisThread->rootMoves[i].previousScore; + Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; bool tb = TB::RootInTB && abs(v) < VALUE_MATE - MAX_PLY; v = tb ? TB::Score : v; @@ -1461,15 +1458,15 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { ss << "info" << " depth " << d / ONE_PLY - << " seldepth " << selDepth + << " seldepth " << pos.this_thread()->maxPly << " multipv " << i + 1 << " score " << UCI::value(v); - if (!tb && i == thisThread->PVIdx) + if (!tb && i == PVIdx) ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); - ss << " nodes " << nodes - << " nps " << nodes * 1000 / elapsed; + ss << " nodes " << nodes_searched + << " nps " << nodes_searched * 1000 / elapsed; if (elapsed > 1000) // Earlier makes little sense ss << " hashfull " << TT.hashfull(); @@ -1478,7 +1475,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " time " << elapsed << " pv"; - for (Move m : thisThread->rootMoves[i].pv) + for (Move m : rootMoves[i].pv) ss << " " << UCI::move(m, pos.is_chess960()); } @@ -1571,9 +1568,6 @@ void check_time() { else if (Limits.movetime && elapsed >= Limits.movetime) Signals.stop = true; - else if (Limits.nodes) - { - if (Threads.nodes_searched() >= Limits.nodes) + else if (Limits.nodes && Threads.nodes_searched() >= Limits.nodes) Signals.stop = true; - } } diff --git a/src/thread.cpp b/src/thread.cpp index 29484273cb1..88b45921439 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -223,10 +223,10 @@ void ThreadPool::read_uci_options() { int64_t ThreadPool::nodes_searched() { - int64_t nodes = 0; - for (Thread *th : *this) - nodes += th->pos.nodes_searched(); - return nodes; + int64_t nodes = 0; + for (Thread *th : *this) + nodes += th->rootPos.nodes_searched(); + return nodes; } @@ -241,7 +241,7 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, Signals.stop = Signals.failedLowAtRoot = false; main()->rootMoves.clear(); - main()->pos = pos; + main()->rootPos = pos; Limits = limits; if (states.get()) // If we don't set a new position, preserve current state { diff --git a/src/thread.h b/src/thread.h index 0c6cdd5af7e..97ed219a67f 100644 --- a/src/thread.h +++ b/src/thread.h @@ -74,7 +74,7 @@ struct Thread : public ThreadBase { int maxPly; volatile bool searching; - Position pos; + Position rootPos; Search::RootMoveVector rootMoves; Search::Stack stack[MAX_PLY+4]; HistoryStats History; From b576f04b26b61435f0cc38541ff95a0be89cc016 Mon Sep 17 00:00:00 2001 From: lucasart Date: Sun, 18 Oct 2015 07:36:16 +0800 Subject: [PATCH 20/21] Atomic signals Rely on well defined behaviour for message passing, instead of volatile. Two versions have been tested, to make sure this wouldn't cause a slowdown on any platform. v1: Sequentially consistent atomics No mesurable regression, despite the extra memory barriers on x86. Even with 15 threads and extreme time pressure, both acting as a magnifying glass: threads=15, tc=2+0.02 ELO: 2.59 +-3.4 (95%) LOS: 93.3% Total: 18132 W: 4113 L: 3978 D: 10041 threads=7, tc=2+0.02 ELO: -1.64 +-3.6 (95%) LOS: 18.8% Total: 16914 W: 4053 L: 4133 D: 8728 v2: Acquire/Release semantics This version generates no extra barriers for x86 (on the hot path). As expected, no regression either, under the same conditions: threads=15, tc=2+0.02 ELO: 2.85 +-3.3 (95%) LOS: 95.4% Total: 19661 W: 4640 L: 4479 D: 10542 threads=7, tc=2+0.02 ELO: 0.23 +-3.5 (95%) LOS: 55.1% Total: 18108 W: 4326 L: 4314 D: 9468 As suggested by Joona, another test at LTC: threads=15, tc=20+0.05 ELO: 0.64 +-2.6 (95%) LOS: 68.3% Total: 20000 W: 3053 L: 3016 D: 13931 Note that, on ARM and PPC, acquire/release will generate barriers, unlike x86. And we don't have ARM and PPC machines on fishtest. This is one of the reson for testing version 1 as well. The theoretical cost of these barriers is not mesurable, as shown by v1 tests. Obviously, we go with v2. No functional change. --- src/search.cpp | 8 ++++---- src/search.h | 9 +++++---- src/thread.cpp | 8 ++++---- src/thread.h | 12 +++++++----- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3bcb7ef3b7e..849924f4e8b 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -37,7 +37,7 @@ namespace Search { - volatile SignalsType Signals; + SignalsType Signals; LimitsType Limits; StateStackPtr SetupStates; } @@ -581,7 +581,7 @@ namespace { if (!RootNode) { // Step 2. Check for aborted search and immediate draw - if (Signals.stop || pos.is_draw() || ss->ply >= MAX_PLY) + if (Signals.stop.load(std::memory_order_acquire) || pos.is_draw() || ss->ply >= MAX_PLY) return ss->ply >= MAX_PLY && !inCheck ? evaluate(pos) : DrawValue[pos.side_to_move()]; // Step 3. Mate distance pruning. Even if we mate at the next move our score @@ -835,7 +835,7 @@ namespace { if (RootNode && thisThread == Threads.main()) { - Signals.firstRootMove = (moveCount == 1); + Signals.firstRootMove.store(moveCount == 1, std::memory_order_release); if (Time.elapsed() > 3000) sync_cout << "info depth " << depth / ONE_PLY @@ -996,7 +996,7 @@ namespace { // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. - if (Signals.stop) + if (Signals.stop.load(std::memory_order_acquire)) return VALUE_ZERO; if (RootNode) diff --git a/src/search.h b/src/search.h index c7abb9dcfd9..740063b9a59 100644 --- a/src/search.h +++ b/src/search.h @@ -20,7 +20,8 @@ #ifndef SEARCH_H_INCLUDED #define SEARCH_H_INCLUDED -#include // For std::auto_ptr +#include +#include // For std::unique_ptr #include #include @@ -91,16 +92,16 @@ struct LimitsType { TimePoint startTime; }; -/// The SignalsType struct stores volatile flags updated during the search +/// The SignalsType struct stores atomic flags updated during the search /// typically in an async fashion e.g. to stop the search by the GUI. struct SignalsType { - bool stop, stopOnPonderhit, firstRootMove, failedLowAtRoot; + std::atomic stop, stopOnPonderhit, firstRootMove, failedLowAtRoot; }; typedef std::unique_ptr> StateStackPtr; -extern volatile SignalsType Signals; +extern SignalsType Signals; extern LimitsType Limits; extern StateStackPtr SetupStates; diff --git a/src/thread.cpp b/src/thread.cpp index 88b45921439..a0d08ab90ab 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -68,16 +68,16 @@ void ThreadBase::notify_one() { // ThreadBase::wait() set the thread to sleep until 'condition' turns true -void ThreadBase::wait(volatile const bool& condition) { +void ThreadBase::wait(std::atomic& condition) { std::unique_lock lk(mutex); - sleepCondition.wait(lk, [&]{ return condition; }); + sleepCondition.wait(lk, [&]{ return condition.load(std::memory_order_acquire); }); } // ThreadBase::wait_while() set the thread to sleep until 'condition' turns false -void ThreadBase::wait_while(volatile const bool& condition) { +void ThreadBase::wait_while(std::atomic& condition) { std::unique_lock lk(mutex); sleepCondition.wait(lk, [&]{ return !condition; }); @@ -87,7 +87,7 @@ void ThreadBase::wait_while(volatile const bool& condition) { // Thread c'tor makes some init but does not launch any execution thread that // will be started only when c'tor returns. -Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in MSVC +Thread::Thread() { searching = false; maxPly = 0; diff --git a/src/thread.h b/src/thread.h index 97ed219a67f..1c4bacc28b1 100644 --- a/src/thread.h +++ b/src/thread.h @@ -44,15 +44,16 @@ const size_t MAX_THREADS = 128; struct ThreadBase : public std::thread { + ThreadBase() { exit = false; } virtual ~ThreadBase() = default; virtual void idle_loop() = 0; void notify_one(); - void wait(volatile const bool& b); - void wait_while(volatile const bool& b); + void wait(std::atomic& b); + void wait_while(std::atomic& b); Mutex mutex; ConditionVariable sleepCondition; - volatile bool exit = false; + std::atomic exit; }; @@ -72,7 +73,7 @@ struct Thread : public ThreadBase { Endgames endgames; size_t idx, PVIdx; int maxPly; - volatile bool searching; + std::atomic searching; Position rootPos; Search::RootMoveVector rootMoves; @@ -87,10 +88,11 @@ struct Thread : public ThreadBase { /// special threads: the main one and the recurring timer. struct MainThread : public Thread { + MainThread() { thinking = true; } // Avoid a race with start_thinking() virtual void idle_loop(); void join(); void think(); - volatile bool thinking = true; // Avoid a race with start_thinking() + std::atomic thinking; }; struct TimerThread : public ThreadBase { From 0c699dcefbf6e6c70768a7a3c01f0ace5d3213b8 Mon Sep 17 00:00:00 2001 From: lucasart Date: Sat, 24 Oct 2015 08:54:19 +0800 Subject: [PATCH 21/21] relaxed loads --- src/search.cpp | 8 ++++---- src/thread.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 527e5d46cef..e544deff345 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -581,7 +581,7 @@ namespace { if (!RootNode) { // Step 2. Check for aborted search and immediate draw - if (Signals.stop.load(std::memory_order_acquire) || pos.is_draw() || ss->ply >= MAX_PLY) + if (Signals.stop.load(std::memory_order_relaxed) || pos.is_draw() || ss->ply >= MAX_PLY) return ss->ply >= MAX_PLY && !inCheck ? evaluate(pos) : DrawValue[pos.side_to_move()]; // Step 3. Mate distance pruning. Even if we mate at the next move our score @@ -835,7 +835,7 @@ namespace { if (RootNode && thisThread == Threads.main()) { - Signals.firstRootMove.store(moveCount == 1, std::memory_order_release); + Signals.firstRootMove = moveCount == 1; if (Time.elapsed() > 3000) sync_cout << "info depth " << depth / ONE_PLY @@ -996,7 +996,7 @@ namespace { // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. - if (Signals.stop.load(std::memory_order_acquire)) + if (Signals.stop.load(std::memory_order_relaxed)) return VALUE_ZERO; if (RootNode) @@ -1559,7 +1559,7 @@ void check_time() { { bool stillAtFirstMove = Signals.firstRootMove && !Signals.failedLowAtRoot - && elapsed > Time.available() * 75 / 100; + && elapsed > Time.available() * 3 / 4; if ( stillAtFirstMove || elapsed > Time.maximum() - 2 * TimerThread::Resolution) diff --git a/src/thread.cpp b/src/thread.cpp index f0681be5892..fb04231b44a 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -71,7 +71,7 @@ void ThreadBase::notify_one() { void ThreadBase::wait(std::atomic& condition) { std::unique_lock lk(mutex); - sleepCondition.wait(lk, [&]{ return condition.load(std::memory_order_acquire); }); + sleepCondition.wait(lk, [&]{ return bool(condition); }); }