Skip to content

Commit

Permalink
CRIU checkpointRestoreTimeDelta adjustment & add time compensation tests
Browse files Browse the repository at this point in the history
Adjusted J9JavaVM->checkpointState.checkpointRestoreTimeDelta and
J9PortLibrary->nanoTimeMonotonicClockDelta calculation;
Added tests for CRIU and non-CRIU scenarios.

Signed-off-by: Jason Feng <[email protected]>
  • Loading branch information
JasonFengJ9 committed Nov 14, 2023
1 parent 5fde7f7 commit 22fef30
Show file tree
Hide file tree
Showing 10 changed files with 447 additions and 33 deletions.
69 changes: 36 additions & 33 deletions runtime/criusupport/criusupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,8 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
VM_VMHelpers::setVMState(currentThread, J9VMSTATE_CRIU_SUPPORT_RESTORE_PHASE_START);
restoreNanoTimeMonotonic = j9time_nano_time();
restoreNanoUTCTime = j9time_current_time_nanos(&success);
vm->checkpointState.lastRestoreTimeMillis = (I_64)(restoreNanoUTCTime/J9TIME_NANOSECONDS_PER_MILLIS);
Trc_CRIU_after_checkpoint(currentThread, restoreNanoTimeMonotonic, restoreNanoUTCTime);
vm->checkpointState.lastRestoreTimeMillis = (I_64)(restoreNanoUTCTime / J9TIME_NANOSECONDS_PER_MILLIS);
Trc_CRIU_after_criu_dump(currentThread, restoreNanoTimeMonotonic, restoreNanoUTCTime, vm->checkpointState.lastRestoreTimeMillis);
if (!syslogFlagNone) {
/* Re-open the system logger, and set options with saved string value. */
j9port_control(J9PORT_CTLDATA_SYSLOG_OPEN, 0);
Expand All @@ -954,36 +954,6 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
nlsMsgFormat = j9nls_lookup_message(J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE, J9NLS_JCL_CRIU_DUMP_FAILED, NULL);
goto wakeJavaThreadsWithExclusiveVMAccess;
}
if (0 == success) {
systemReturnCode = errno;
currentExceptionClass = vm->checkpointState.criuJVMRestoreExceptionClass;
nlsMsgFormat = j9nls_lookup_message(J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE, J9NLS_JCL_CRIU_J9_CURRENT_TIME_NANOS_FAILURE, NULL);
goto wakeJavaThreadsWithExclusiveVMAccess;
}
/* JVM downtime between checkpoint and restore is calculated with j9time_current_time_nanos()
* which is expected to be accurate in scenarios such as host rebooting, CRIU image moving across timezones.
*/
vm->checkpointState.checkpointRestoreTimeDelta = (I_64)(restoreNanoUTCTime - checkpointNanoUTCTime);
if (vm->checkpointState.checkpointRestoreTimeDelta < 0) {
/* A negative value was calculated for checkpointRestoreTimeDelta,
* Trc_CRIU_checkpoint_nano_times & Trc_CRIU_restore_nano_times can be used for further investigation.
* Currently OpenJ9 CRIU only supports 64-bit systems, and IDATA is equivalent to int64_t here.
*/
systemReturnCode = (IDATA)vm->checkpointState.checkpointRestoreTimeDelta;
currentExceptionClass = vm->checkpointState.criuJVMRestoreExceptionClass;
nlsMsgFormat = j9nls_lookup_message(J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE,
J9NLS_JCL_CRIU_NEGATIVE_CHECKPOINT_RESTORE_TIME_DELTA, NULL);
goto wakeJavaThreadsWithExclusiveVMAccess;
}
/* j9time_nano_time() might be based on a different starting point in scenarios
* such as host rebooting, CRIU image moving across timezones.
* The adjustment calculated below is expected to be same as checkpointRestoreTimeDelta
* if there is no change for j9time_nano_time() start point.
* This value might be negative.
*/
portLibrary->nanoTimeMonotonicClockDelta = restoreNanoTimeMonotonic - checkpointNanoTimeMonotonic;
Trc_CRIU_restore_nano_times(currentThread, restoreNanoUTCTime, checkpointNanoUTCTime, vm->checkpointState.checkpointRestoreTimeDelta,
restoreNanoTimeMonotonic, checkpointNanoTimeMonotonic, portLibrary->nanoTimeMonotonicClockDelta);

/* We can only end up here if the CRIU restore was successful */
isAfterCheckpoint = TRUE;
Expand Down Expand Up @@ -1021,6 +991,40 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
goto wakeJavaThreads;
}

restoreNanoTimeMonotonic = j9time_nano_time();
restoreNanoUTCTime = j9time_current_time_nanos(&success);
if (0 == success) {
systemReturnCode = errno;
currentExceptionClass = vm->checkpointState.criuJVMRestoreExceptionClass;
nlsMsgFormat = j9nls_lookup_message(J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE, J9NLS_JCL_CRIU_J9_CURRENT_TIME_NANOS_FAILURE, NULL);
goto wakeJavaThreadsWithExclusiveVMAccess;
}
Trc_CRIU_after_checkpoint(currentThread, restoreNanoTimeMonotonic, restoreNanoUTCTime);
/* JVM downtime between checkpoint and restore is calculated with j9time_current_time_nanos()
* which is expected to be accurate in scenarios such as host rebooting, CRIU image moving across timezones.
*/
vm->checkpointState.checkpointRestoreTimeDelta = (I_64)(restoreNanoUTCTime - checkpointNanoUTCTime);
if (vm->checkpointState.checkpointRestoreTimeDelta < 0) {
/* A negative value was calculated for checkpointRestoreTimeDelta,
* Trc_CRIU_checkpoint_nano_times & Trc_CRIU_restore_nano_times can be used for further investigation.
* Currently OpenJ9 CRIU only supports 64-bit systems, and IDATA is equivalent to int64_t here.
*/
systemReturnCode = (IDATA)vm->checkpointState.checkpointRestoreTimeDelta;
currentExceptionClass = vm->checkpointState.criuJVMRestoreExceptionClass;
nlsMsgFormat = j9nls_lookup_message(J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE,
J9NLS_JCL_CRIU_NEGATIVE_CHECKPOINT_RESTORE_TIME_DELTA, NULL);
goto wakeJavaThreadsWithExclusiveVMAccess;
}
/* j9time_nano_time() might be based on a different starting point in scenarios
* such as host rebooting, CRIU image moving across timezones.
* The adjustment calculated below is expected to be same as checkpointRestoreTimeDelta
* if there is no change for j9time_nano_time() start point.
* This value might be negative.
*/
portLibrary->nanoTimeMonotonicClockDelta = restoreNanoTimeMonotonic - checkpointNanoTimeMonotonic;
Trc_CRIU_restore_nano_times(currentThread, restoreNanoUTCTime, checkpointNanoUTCTime, vm->checkpointState.checkpointRestoreTimeDelta,
restoreNanoTimeMonotonic, checkpointNanoTimeMonotonic, portLibrary->nanoTimeMonotonicClockDelta);

VM_VMHelpers::setVMState(currentThread, J9VMSTATE_CRIU_SUPPORT_RESTORE_PHASE_INTERNAL_HOOKS);

if (FALSE == vmFuncs->jvmRestoreHooks(currentThread)) {
Expand Down Expand Up @@ -1052,7 +1056,6 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
}
}


wakeJavaThreads:
acquireSafeOrExcusiveVMAccess(currentThread, vmFuncs, safePoint);

Expand Down
1 change: 1 addition & 0 deletions runtime/criusupport/j9criu.tdf
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ TraceException=Trc_CRIU_setupJNIFieldIDsAndCRIUAPI_load_criu_failure Overhead=1
TraceException=Trc_CRIU_setupJNIFieldIDsAndCRIUAPI_not_find_criu_methods Overhead=1 Level=1 Template="setupJNIFieldIDsAndCRIUAPI() The JVM could not find critical criu functions in libcriu.so: %zi"

TraceException=Trc_CRIU_checkpointJVMImpl_triggerOneOffJavaDump Overhead=1 Level=1 Template="checkpointJVMImpl triggerOneOffDump() returns %d"
TraceEvent=Trc_CRIU_after_criu_dump Overhead=1 Level=2 Template="After checkpoint criu_dump(), criuRestoreNanoTimeMonotonic (%lld), criuRestoreNanoUTCTime (%llu), lastRestoreTimeMillis (%lld)"
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*******************************************************************************
* Copyright IBM Corp. and others 2023
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
* or the Apache License, Version 2.0 which accompanies this distribution and
* is available at https://www.apache.org/licenses/LICENSE-2.0.
*
* This Source Code may also be made available under the following
* Secondary Licenses when the conditions for such availability set
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
* General Public License, version 2 with the GNU Classpath
* Exception [1] and GNU General Public License, version 2 with the
* OpenJDK Assembly Exception [2].
*
* [1] https://www.gnu.org/software/classpath/license.html
* [2] https://openjdk.org/legal/assembly-exception.html
*
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
*******************************************************************************/
package org.openj9.test.util;

import java.util.ArrayList;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;

/**
* Utility class to check time.
*/
public class TimeUtilities {
public final static long TIME_NANOSECONDS_PER_MILLIS = 1000000;

public static void showThreadCurrentTime(String msgHeader) {
System.out.println(msgHeader + ", current thread name: " + Thread.currentThread().getName() + ", " + new Date()
+ ", System.currentTimeMillis() " + System.currentTimeMillis() + ", System.nanoTime() "
+ System.nanoTime());
}

public static boolean checkElapseTime(String testName, long startMillisTime, long startNanoTime,
long minElapsedMillisTime, long maxElapsedMillisTime, long minElapsedNanoTimeInMillis,
long maxElapsedNanoTimeInMillis) {
boolean result = true;
final long currentMillisTime = System.currentTimeMillis();
final long currentNanoTime = System.nanoTime();
showThreadCurrentTime(testName + " checkElapseTime starts");

final long elapsedMillisTime = currentMillisTime - startMillisTime;
final long elapsedNanoTime = currentNanoTime - startNanoTime;
// Get the smallest value that is greater than or equal to the elapsed nano time
// in milliseconds for the case that the elapsed nano
// time is slightly less than the elapsedMillisTime, such as the elapsedNanoTime
// (1999997603ns) less than elapsedMillisTime * TIME_NANOSECONDS_PER_MILLIS
// (2000000000ns)
// This behaviour was observed in both RI and OpenJ9.
final long elapsedNanoTimeInMillis = elapsedNanoTime / TIME_NANOSECONDS_PER_MILLIS
+ ((elapsedNanoTime % TIME_NANOSECONDS_PER_MILLIS == 0) ? 0 : 1);

System.out.println(testName + ": startMillisTime (" + startMillisTime + "ms) startNanoTime (" + startNanoTime
+ "ns)" + " currentMillisTime (" + currentMillisTime + "ms) currentNanoTime (" + currentNanoTime
+ "ns) elapsedMillisTime (" + elapsedMillisTime + "ms) elapsedNanoTime (" + elapsedNanoTime + "ns)");
if (elapsedMillisTime < minElapsedMillisTime) {
result = false;
System.out.println("FAILED: " + testName + " elapsedMillisTime (" + elapsedMillisTime
+ "ms) should NOT be less than minElapsedMillisTime (" + minElapsedMillisTime + "ms)");
} else if (elapsedMillisTime > maxElapsedMillisTime) {
result = false;
System.out.println("FAILED: " + testName + " elapsedMillisTime (" + elapsedMillisTime
+ "ms) should NOT be greater than maxElapsedMillisTime (" + maxElapsedMillisTime + "ms)");
}
if (elapsedNanoTimeInMillis < minElapsedNanoTimeInMillis) {
result = false;
System.out.println("FAILED: " + testName + " elapsedNanoTimeInMillis (" + elapsedNanoTimeInMillis
+ "ms) should NOT be less than minElapsedNanoTimeInMillis (" + minElapsedNanoTimeInMillis + "ms)");
} else if (elapsedNanoTimeInMillis > maxElapsedNanoTimeInMillis) {
result = false;
System.out.println("FAILED: " + testName + " elapsedNanoTimeInMillis (" + elapsedNanoTimeInMillis
+ "ms) should NOT be greater than maxElapsedNanoTimeInMillis (" + maxElapsedNanoTimeInMillis
+ "ms)");
}
showThreadCurrentTime(testName + " checkElapseTime ends");

return result;
}

private volatile boolean tasksPassed = true;
private volatile int taskRunning = 0;
private volatile int taskStarted = 0;
private volatile int taskExecuted = 0;
private final ArrayList<Timer> timers = new ArrayList<Timer>();

public void timerSchedule(String testName, long startMillisTime, long startNanoTime, long minElapsedMillisTime,
long maxElapsedMillisTime, long minElapsedNanoTimeInMillis, long maxElapsedNanoTimeInMillis, long delay) {
Timer timer = new Timer(testName);
timer.schedule(new TimeTestTask(testName, startMillisTime, startNanoTime, minElapsedMillisTime,
maxElapsedMillisTime, minElapsedNanoTimeInMillis, maxElapsedNanoTimeInMillis), delay);
timers.add(timer);
}

public boolean getResultAndCancelTimers() {
showThreadCurrentTime("getResultAndCancelTimers before Thread.yield() taskRunning = " + taskRunning);
while (taskRunning > 0) {
Thread.yield();
}

showThreadCurrentTime("getResultAndCancelTimers before timer.cancel()");
for (Timer timer : timers) {
timer.cancel();
}

showThreadCurrentTime("TimeTestTask tasksPassed: " + tasksPassed + ", taskStarted: " + taskStarted
+ ", taskExecuted: " + taskExecuted);
return tasksPassed && (taskStarted == taskExecuted);
}

class TimeTestTask extends TimerTask {

private final String testName;
private final long startMillisTime;
private final long startNanoTime;
private final long minElapsedMillisTime;
private final long maxElapsedMillisTime;
private final long minElapsedNanoTimeInMillis;
private final long maxElapsedNanoTimeInMillis;

TimeTestTask(String testName, long startMillisTime, long startNanoTime, long minElapsedMillisTime,
long maxElapsedMillisTime, long minElapsedNanoTimeInMillis, long maxElapsedNanoTimeInMillis) {
this.testName = testName;
this.startMillisTime = startMillisTime;
this.startNanoTime = startNanoTime;
this.minElapsedMillisTime = minElapsedMillisTime;
this.maxElapsedMillisTime = maxElapsedMillisTime;
this.minElapsedNanoTimeInMillis = minElapsedNanoTimeInMillis;
this.maxElapsedNanoTimeInMillis = maxElapsedNanoTimeInMillis;

taskStarted++;
taskRunning++;
}

public void run() {
taskExecuted++;
if (!checkElapseTime(testName, startMillisTime, startNanoTime, minElapsedMillisTime, maxElapsedMillisTime,
minElapsedNanoTimeInMillis, maxElapsedNanoTimeInMillis)) {
tasksPassed = false;
}
taskRunning--;
}
}
}
7 changes: 7 additions & 0 deletions test/functional/cmdLineTests/cmdLineTest_J9tests/build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
<property name="DEST" value="${BUILD_ROOT}/functional/cmdLineTests/cmdLineTest_J9tests" />
<property name="PROJECT_ROOT" location="." />
<property name="src" location="${PROJECT_ROOT}/src" />
<property name="TestUtilities" location="../../TestUtilities/src"/>
<property name="build" location="${PROJECT_ROOT}/bin" />

<target name="init">
Expand All @@ -49,13 +50,19 @@
<echo>===debug: on</echo>
<echo>===destdir: ${DEST}</echo>
<javac srcdir="${src}" destdir="${build}" debug="true" fork="true" executable="${compiler.javac}" includeAntRuntime="false" encoding="ISO-8859-1">
<src path="${src}" />
<src path="${TestUtilities}" />
<classpath>
<pathelement location="${LIB_DIR}/testng.jar"/>
</classpath>
</javac>
</target>

<target name="dist" depends="compile" description="generate the distribution">
<jar jarfile="${DEST}/cmdLineTest_J9tests.jar" filesonly="true">
<fileset dir="${build}" />
<fileset dir="${src}" />
<fileset dir="${TestUtilities}" />
</jar>
<copy todir="${DEST}">
<fileset dir="${PROJECT_ROOT}" includes="*.xml" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>

<!--
Copyright IBM Corp. and others 2023
This program and the accompanying materials are made available under
the terms of the Eclipse Public License 2.0 which accompanies this
distribution and is available at https://www.eclipse.org/legal/epl-2.0/
or the Apache License, Version 2.0 which accompanies this distribution and
is available at https://www.apache.org/licenses/LICENSE-2.0.
This Source Code may also be made available under the following
Secondary Licenses when the conditions for such availability set
forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
General Public License, version 2 with the GNU Classpath
Exception [1] and GNU General Public License, version 2 with the
OpenJDK Assembly Exception [2].
[1] https://www.gnu.org/software/classpath/license.html
[2] https://openjdk.org/legal/assembly-exception.html
SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
-->
<!DOCTYPE suite SYSTEM "cmdlinetester.dtd">

<suite id="Elapsed time measurement" timeout="180">
<test id="Verify the elapsed time">
<command>$EXE$ -cp $Q$$JARPATH$$Q$ ElapsedTime</command>
<output type="success" caseSensitive="yes" regex="no">Thread-0</output>
<output type="success" caseSensitive="yes" regex="no">PASSED: testElapsedTime</output>
<output type="failure" caseSensitive="yes" regex="no">FAILED: testElapsedTime</output>
</test>
</suite>
18 changes: 18 additions & 0 deletions test/functional/cmdLineTests/cmdLineTest_J9tests/playlist.xml
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,22 @@ SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-ex
<impl>openj9</impl>
</impls>
</test>
<test>
<testCaseName>cmdLineTest_J9test_elapsedTime</testCaseName>
<command>$(JAVA_COMMAND) $(CMDLINETESTER_JVM_OPTIONS) \
-DJARPATH=$(Q)$(TESTNG)$(P)$(TEST_RESROOT)$(D)cmdLineTest_J9tests.jar$(Q) \
-DJVMLIBPATH=$(Q)$(J9VM_PATH)$(Q) \
-DTESTDIR=$(Q)$(TEST_RESROOT)$(Q) -DRESJAR=$(CMDLINETESTER_RESJAR) -DEXE=$(SQ)$(JAVA_COMMAND) $(JVM_OPTIONS)$(SQ) \
-jar $(CMDLINETESTER_JAR) \
-config $(Q)$(TEST_RESROOT)$(D)j9tests_elapsedTime.xml$(Q) \
-xids all,$(PLATFORM) -plats all,$(PLATFORM) -xlist $(Q)$(TEST_RESROOT)$(D)j9tests_exclude.xml$(Q) \
-explainExcludes -nonZeroExitWhenError; \
$(TEST_STATUS)</command>
<levels>
<level>sanity</level>
</levels>
<groups>
<group>functional</group>
</groups>
</test>
</playlist>
Loading

0 comments on commit 22fef30

Please sign in to comment.