Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix computation of cache-line size on Power #6515

Merged
merged 1 commit into from
May 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions include_core/omrutil.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2021 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -330,9 +330,9 @@ getPageTypeStringWithLeadingSpace(uintptr_t pageFlags);
/**
* @brief
* @param void
* @return uintptr_t
* @return uint32_t
*/
uintptr_t getCacheLineSize(void);
uint32_t getCacheLineSize(void);


/**
Expand Down
52 changes: 15 additions & 37 deletions port/unix/omrcpu.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2020 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -34,6 +34,7 @@
#include <string.h>
#endif
#include "omrport.h"
#include "omrutil.h"
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)
#include "omrportpriv.h"
#include "omrportpg.h"
Expand Down Expand Up @@ -61,27 +62,10 @@
int32_t
omrcpu_startup(struct OMRPortLibrary *portLibrary)
{
#if defined(LINUXPPC) || defined(PPC) || defined(RS6000)
/* initialize the ppc level 1 cache line size */
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)
int32_t ppcCacheLineSize;

int i;
char buf[1024];
memset(buf, 255, 1024);

__asm__(
"dcbz 0, %0"
: /* no outputs */
:"r"((void *) &buf[512]));

for (i = 0, ppcCacheLineSize = 0; i < 1024; i++) {
if (buf[i] == 0) {
ppcCacheLineSize++;
}
}

PPG_mem_ppcCacheLineSize = ppcCacheLineSize;
#endif
PPG_mem_ppcCacheLineSize = getCacheLineSize();
#endif /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */

return 0;
}
Expand Down Expand Up @@ -113,13 +97,12 @@ omrcpu_shutdown(struct OMRPortLibrary *portLibrary)
void
omrcpu_flush_icache(struct OMRPortLibrary *portLibrary, void *memoryPointer, uintptr_t byteAmount)
{
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)

int32_t cacheLineSize = PPG_mem_ppcCacheLineSize;
unsigned char *addr;
unsigned char *limit;
limit = (unsigned char *)(((unsigned long)memoryPointer + (unsigned int)byteAmount + (cacheLineSize - 1))
/ cacheLineSize * cacheLineSize);
#if defined(LINUXPPC) || defined(PPC) || defined(RS6000)
uint32_t cacheLineSize = PPG_mem_ppcCacheLineSize;
unsigned char *addr = NULL;
unsigned char *limit = (unsigned char *)
(((uintptr_t)memoryPointer + byteAmount + (cacheLineSize - 1))
/ cacheLineSize * cacheLineSize);

/* for each cache line, do a data cache block flush */
for (addr = (unsigned char *)memoryPointer ; addr < limit; addr += cacheLineSize) {
Expand All @@ -142,15 +125,14 @@ omrcpu_flush_icache(struct OMRPortLibrary *portLibrary, void *memoryPointer, uin
__asm__("sync");
__asm__("isync");

#elif defined(ARM) || defined(AARCH64) /* defined(RS6000) || defined(LINUXPPC) || defined(PPC) */
#elif defined(AARCH64) || defined(ARM) /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */
#if defined(__GNUC__)
// GCC built-in function
__builtin___clear_cache(memoryPointer, (void *)((char *)memoryPointer+byteAmount));
#else
__builtin___clear_cache(memoryPointer, (void *)((unsigned char *)memoryPointer + byteAmount));
#else /* defined(__GNUC__) */
#error Not supported
#endif /* defined(__GNUC__) */
#endif /* defined(ARM) || defined(AARCH64) */

#endif /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */
}

int32_t
Expand All @@ -177,7 +159,3 @@ omrcpu_get_cache_line_size(struct OMRPortLibrary *portLibrary, int32_t *lineSize
}
return rc;
}




2 changes: 1 addition & 1 deletion port/unix_include/omrportpg.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ typedef struct OMRPortPlatformGlobals {
BOOLEAN globalConverterEnabled;
char *si_executableName;
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)
int32_t mem_ppcCacheLineSize;
uint32_t mem_ppcCacheLineSize;
#endif
#if defined(OMR_CONFIGURABLE_SUSPEND_SIGNAL)
int32_t introspect_threadSuspendSignal;
Expand Down
43 changes: 27 additions & 16 deletions util/omrutil/j9memclr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2021 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -44,8 +44,8 @@ extern "C" void J9ZERZ10(void *ptr, uintptr_t length);
#endif

#if defined(AIXPPC) || defined(LINUXPPC)
static uintptr_t cacheLineSize = 0;
#endif
static uint32_t cacheLineSize = 0;
#endif /* defined(AIXPPC) || defined(LINUXPPC) */

#if defined(J9ZOS390)
struct IHAPSA {
Expand All @@ -65,8 +65,7 @@ OMRZeroMemory(void *ptr, uintptr_t length)

#if defined(AIXPPC) || defined(LINUXPPC)
char *addr = static_cast<char*>(ptr);
char *limit;
uintptr_t localCacheLineSize;
char *limit = NULL;

#if defined(LINUXPPC)
if (length < 2048) {
Expand All @@ -80,16 +79,16 @@ OMRZeroMemory(void *ptr, uintptr_t length)
cacheLineSize = getCacheLineSize();
}

uint32_t localCacheLineSize = cacheLineSize;

/* Zeroing by dcbz is effective if requested length is at least twice larger then Data Cache Block size */
if (length < (2 * cacheLineSize)) {
if (length < (2 * localCacheLineSize)) {
memset(ptr, 0, (size_t)length);
return;
}

/* VMDESIGN 1314 - Allow the compile to unroll the loop below by avoiding using the global in the loop */

localCacheLineSize = cacheLineSize;

/* Zero any initial portion to first cache line boundary
* Assumed here that size of first portion (from start to aligned address) is smaller then total requested size
* This is correct because sizes smaller then 2 * Data Cache Block size are served already
Expand All @@ -107,7 +106,11 @@ OMRZeroMemory(void *ptr, uintptr_t length)
/* dcbz forms a group on POWER4, so there is no reason to unroll */
limit = (char *)(((uintptr_t)ptr + length) & ~(localCacheLineSize - 1));
for (; addr < limit; addr += localCacheLineSize) {
__asm__ __volatile__("dcbz 0,%0" : /* no outputs */ : "r"(addr));
__asm__ __volatile__(
"dcbz 0, %0"
: /* no outputs */
: "r"(addr)
: /* clobbers */ "memory");
}

/* zero final portion smaller than a cache line */
Expand Down Expand Up @@ -143,21 +146,29 @@ OMRZeroMemory(void *ptr, uintptr_t length)
}


uintptr_t
uint32_t
getCacheLineSize(void)
{
#if defined(AIXPPC) || defined (LINUXPPC)
char buf[1024];
uintptr_t i, ppcCacheLineSize;
uint32_t i = 0;
uint32_t ppcCacheLineSize = 0;

/* xlc -O3 inlines/unrolls this memset */
memset(buf, 255, 1024);
__asm__ __volatile__("dcbz 0,%0" : /* no outputs */ : "r"(&buf[512]));
for (i = 0, ppcCacheLineSize = 0; i < 1024; i++) {
if (buf[i] == 0) {
ppcCacheLineSize++;
memset(buf, 255, sizeof(buf));

__asm__ __volatile__(
"dcbz 0, %0"
: /* no outputs */
: "r"((void *)&buf[512])
: /* clobbers */ "memory");

for (i = 0, ppcCacheLineSize = 0; i < sizeof(buf); ++i) {
if (0 == buf[i]) {
ppcCacheLineSize += 1;
}
}

return ppcCacheLineSize;
#else
return 0;
Expand Down