From 8b3bf5b750f8ed1d66b283deb0c3704f4d36adb5 Mon Sep 17 00:00:00 2001 From: "Keith W. Campbell" Date: Thu, 12 May 2022 15:18:53 -0400 Subject: [PATCH] Fix computation of cache-line size on Power The compiler needs to be told that our uses of dcbz clobber memory, otherwise the optimizer can reasonably assume in getCacheLineSize(), that each element of buf still has the value (255) assigned by memset(). * reuse getCacheLineSize() in omrcpu_startup() * use uint32_t consistently for cache line size Signed-off-by: Keith W. Campbell --- include_core/omrutil.h | 6 ++-- port/unix/omrcpu.c | 52 ++++++++++------------------------- port/unix_include/omrportpg.h | 2 +- util/omrutil/j9memclr.cpp | 43 ++++++++++++++++++----------- 4 files changed, 46 insertions(+), 57 deletions(-) diff --git a/include_core/omrutil.h b/include_core/omrutil.h index b06dff5196e..498b64e1e15 100644 --- a/include_core/omrutil.h +++ b/include_core/omrutil.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 1991, 2021 IBM Corp. and others + * Copyright (c) 1991, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -330,9 +330,9 @@ getPageTypeStringWithLeadingSpace(uintptr_t pageFlags); /** * @brief * @param void -* @return uintptr_t +* @return uint32_t */ -uintptr_t getCacheLineSize(void); +uint32_t getCacheLineSize(void); /** diff --git a/port/unix/omrcpu.c b/port/unix/omrcpu.c index 09e93ffc310..cfe66310ea5 100644 --- a/port/unix/omrcpu.c +++ b/port/unix/omrcpu.c @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 1991, 2020 IBM Corp. and others + * Copyright (c) 1991, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -34,6 +34,7 @@ #include #endif #include "omrport.h" +#include "omrutil.h" #if defined(RS6000) || defined (LINUXPPC) || defined (PPC) #include "omrportpriv.h" #include "omrportpg.h" @@ -61,27 +62,10 @@ int32_t omrcpu_startup(struct OMRPortLibrary *portLibrary) { +#if defined(LINUXPPC) || defined(PPC) || defined(RS6000) /* initialize the ppc level 1 cache line size */ -#if defined(RS6000) || defined (LINUXPPC) || defined (PPC) - int32_t ppcCacheLineSize; - - int i; - char buf[1024]; - memset(buf, 255, 1024); - - __asm__( - "dcbz 0, %0" - : /* no outputs */ - :"r"((void *) &buf[512])); - - for (i = 0, ppcCacheLineSize = 0; i < 1024; i++) { - if (buf[i] == 0) { - ppcCacheLineSize++; - } - } - - PPG_mem_ppcCacheLineSize = ppcCacheLineSize; -#endif + PPG_mem_ppcCacheLineSize = getCacheLineSize(); +#endif /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */ return 0; } @@ -113,13 +97,12 @@ omrcpu_shutdown(struct OMRPortLibrary *portLibrary) void omrcpu_flush_icache(struct OMRPortLibrary *portLibrary, void *memoryPointer, uintptr_t byteAmount) { -#if defined(RS6000) || defined (LINUXPPC) || defined (PPC) - - int32_t cacheLineSize = PPG_mem_ppcCacheLineSize; - unsigned char *addr; - unsigned char *limit; - limit = (unsigned char *)(((unsigned long)memoryPointer + (unsigned int)byteAmount + (cacheLineSize - 1)) - / cacheLineSize * cacheLineSize); +#if defined(LINUXPPC) || defined(PPC) || defined(RS6000) + uint32_t cacheLineSize = PPG_mem_ppcCacheLineSize; + unsigned char *addr = NULL; + unsigned char *limit = (unsigned char *) + (((uintptr_t)memoryPointer + byteAmount + (cacheLineSize - 1)) + / cacheLineSize * cacheLineSize); /* for each cache line, do a data cache block flush */ for (addr = (unsigned char *)memoryPointer ; addr < limit; addr += cacheLineSize) { @@ -142,15 +125,14 @@ omrcpu_flush_icache(struct OMRPortLibrary *portLibrary, void *memoryPointer, uin __asm__("sync"); __asm__("isync"); -#elif defined(ARM) || defined(AARCH64) /* defined(RS6000) || defined(LINUXPPC) || defined(PPC) */ +#elif defined(AARCH64) || defined(ARM) /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */ #if defined(__GNUC__) // GCC built-in function - __builtin___clear_cache(memoryPointer, (void *)((char *)memoryPointer+byteAmount)); -#else + __builtin___clear_cache(memoryPointer, (void *)((unsigned char *)memoryPointer + byteAmount)); +#else /* defined(__GNUC__) */ #error Not supported #endif /* defined(__GNUC__) */ -#endif /* defined(ARM) || defined(AARCH64) */ - +#endif /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */ } int32_t @@ -177,7 +159,3 @@ omrcpu_get_cache_line_size(struct OMRPortLibrary *portLibrary, int32_t *lineSize } return rc; } - - - - diff --git a/port/unix_include/omrportpg.h b/port/unix_include/omrportpg.h index 72299fad0e7..0f2387d83e5 100644 --- a/port/unix_include/omrportpg.h +++ b/port/unix_include/omrportpg.h @@ -95,7 +95,7 @@ typedef struct OMRPortPlatformGlobals { BOOLEAN globalConverterEnabled; char *si_executableName; #if defined(RS6000) || defined (LINUXPPC) || defined (PPC) - int32_t mem_ppcCacheLineSize; + uint32_t mem_ppcCacheLineSize; #endif #if defined(OMR_CONFIGURABLE_SUSPEND_SIGNAL) int32_t introspect_threadSuspendSignal; diff --git a/util/omrutil/j9memclr.cpp b/util/omrutil/j9memclr.cpp index b16460be5ff..f6d15b9dffe 100644 --- a/util/omrutil/j9memclr.cpp +++ b/util/omrutil/j9memclr.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 1991, 2021 IBM Corp. and others + * Copyright (c) 1991, 2022 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -44,8 +44,8 @@ extern "C" void J9ZERZ10(void *ptr, uintptr_t length); #endif #if defined(AIXPPC) || defined(LINUXPPC) -static uintptr_t cacheLineSize = 0; -#endif +static uint32_t cacheLineSize = 0; +#endif /* defined(AIXPPC) || defined(LINUXPPC) */ #if defined(J9ZOS390) struct IHAPSA { @@ -65,8 +65,7 @@ OMRZeroMemory(void *ptr, uintptr_t length) #if defined(AIXPPC) || defined(LINUXPPC) char *addr = static_cast(ptr); - char *limit; - uintptr_t localCacheLineSize; + char *limit = NULL; #if defined(LINUXPPC) if (length < 2048) { @@ -80,16 +79,16 @@ OMRZeroMemory(void *ptr, uintptr_t length) cacheLineSize = getCacheLineSize(); } + uint32_t localCacheLineSize = cacheLineSize; + /* Zeroing by dcbz is effective if requested length is at least twice larger then Data Cache Block size */ - if (length < (2 * cacheLineSize)) { + if (length < (2 * localCacheLineSize)) { memset(ptr, 0, (size_t)length); return; } /* VMDESIGN 1314 - Allow the compile to unroll the loop below by avoiding using the global in the loop */ - localCacheLineSize = cacheLineSize; - /* Zero any initial portion to first cache line boundary * Assumed here that size of first portion (from start to aligned address) is smaller then total requested size * This is correct because sizes smaller then 2 * Data Cache Block size are served already @@ -107,7 +106,11 @@ OMRZeroMemory(void *ptr, uintptr_t length) /* dcbz forms a group on POWER4, so there is no reason to unroll */ limit = (char *)(((uintptr_t)ptr + length) & ~(localCacheLineSize - 1)); for (; addr < limit; addr += localCacheLineSize) { - __asm__ __volatile__("dcbz 0,%0" : /* no outputs */ : "r"(addr)); + __asm__ __volatile__( + "dcbz 0, %0" + : /* no outputs */ + : "r"(addr) + : /* clobbers */ "memory"); } /* zero final portion smaller than a cache line */ @@ -143,21 +146,29 @@ OMRZeroMemory(void *ptr, uintptr_t length) } -uintptr_t +uint32_t getCacheLineSize(void) { #if defined(AIXPPC) || defined (LINUXPPC) char buf[1024]; - uintptr_t i, ppcCacheLineSize; + uint32_t i = 0; + uint32_t ppcCacheLineSize = 0; /* xlc -O3 inlines/unrolls this memset */ - memset(buf, 255, 1024); - __asm__ __volatile__("dcbz 0,%0" : /* no outputs */ : "r"(&buf[512])); - for (i = 0, ppcCacheLineSize = 0; i < 1024; i++) { - if (buf[i] == 0) { - ppcCacheLineSize++; + memset(buf, 255, sizeof(buf)); + + __asm__ __volatile__( + "dcbz 0, %0" + : /* no outputs */ + : "r"((void *)&buf[512]) + : /* clobbers */ "memory"); + + for (i = 0, ppcCacheLineSize = 0; i < sizeof(buf); ++i) { + if (0 == buf[i]) { + ppcCacheLineSize += 1; } } + return ppcCacheLineSize; #else return 0;