Skip to content

Commit

Permalink
Merge pull request #6515 from keithc-ca/ppc_cache_line_size
Browse files Browse the repository at this point in the history
Fix computation of cache-line size on Power
  • Loading branch information
jdmpapin authored May 16, 2022
2 parents c862fbb + 8b3bf5b commit c67d754
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 57 deletions.
6 changes: 3 additions & 3 deletions include_core/omrutil.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2021 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -330,9 +330,9 @@ getPageTypeStringWithLeadingSpace(uintptr_t pageFlags);
/**
* @brief
* @param void
* @return uintptr_t
* @return uint32_t
*/
uintptr_t getCacheLineSize(void);
uint32_t getCacheLineSize(void);


/**
Expand Down
52 changes: 15 additions & 37 deletions port/unix/omrcpu.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2020 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -34,6 +34,7 @@
#include <string.h>
#endif
#include "omrport.h"
#include "omrutil.h"
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)
#include "omrportpriv.h"
#include "omrportpg.h"
Expand Down Expand Up @@ -61,27 +62,10 @@
int32_t
omrcpu_startup(struct OMRPortLibrary *portLibrary)
{
#if defined(LINUXPPC) || defined(PPC) || defined(RS6000)
/* initialize the ppc level 1 cache line size */
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)
int32_t ppcCacheLineSize;

int i;
char buf[1024];
memset(buf, 255, 1024);

__asm__(
"dcbz 0, %0"
: /* no outputs */
:"r"((void *) &buf[512]));

for (i = 0, ppcCacheLineSize = 0; i < 1024; i++) {
if (buf[i] == 0) {
ppcCacheLineSize++;
}
}

PPG_mem_ppcCacheLineSize = ppcCacheLineSize;
#endif
PPG_mem_ppcCacheLineSize = getCacheLineSize();
#endif /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */

return 0;
}
Expand Down Expand Up @@ -113,13 +97,12 @@ omrcpu_shutdown(struct OMRPortLibrary *portLibrary)
void
omrcpu_flush_icache(struct OMRPortLibrary *portLibrary, void *memoryPointer, uintptr_t byteAmount)
{
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)

int32_t cacheLineSize = PPG_mem_ppcCacheLineSize;
unsigned char *addr;
unsigned char *limit;
limit = (unsigned char *)(((unsigned long)memoryPointer + (unsigned int)byteAmount + (cacheLineSize - 1))
/ cacheLineSize * cacheLineSize);
#if defined(LINUXPPC) || defined(PPC) || defined(RS6000)
uint32_t cacheLineSize = PPG_mem_ppcCacheLineSize;
unsigned char *addr = NULL;
unsigned char *limit = (unsigned char *)
(((uintptr_t)memoryPointer + byteAmount + (cacheLineSize - 1))
/ cacheLineSize * cacheLineSize);

/* for each cache line, do a data cache block flush */
for (addr = (unsigned char *)memoryPointer ; addr < limit; addr += cacheLineSize) {
Expand All @@ -142,15 +125,14 @@ omrcpu_flush_icache(struct OMRPortLibrary *portLibrary, void *memoryPointer, uin
__asm__("sync");
__asm__("isync");

#elif defined(ARM) || defined(AARCH64) /* defined(RS6000) || defined(LINUXPPC) || defined(PPC) */
#elif defined(AARCH64) || defined(ARM) /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */
#if defined(__GNUC__)
// GCC built-in function
__builtin___clear_cache(memoryPointer, (void *)((char *)memoryPointer+byteAmount));
#else
__builtin___clear_cache(memoryPointer, (void *)((unsigned char *)memoryPointer + byteAmount));
#else /* defined(__GNUC__) */
#error Not supported
#endif /* defined(__GNUC__) */
#endif /* defined(ARM) || defined(AARCH64) */

#endif /* defined(LINUXPPC) || defined(PPC) || defined(RS6000) */
}

int32_t
Expand All @@ -177,7 +159,3 @@ omrcpu_get_cache_line_size(struct OMRPortLibrary *portLibrary, int32_t *lineSize
}
return rc;
}




2 changes: 1 addition & 1 deletion port/unix_include/omrportpg.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ typedef struct OMRPortPlatformGlobals {
BOOLEAN globalConverterEnabled;
char *si_executableName;
#if defined(RS6000) || defined (LINUXPPC) || defined (PPC)
int32_t mem_ppcCacheLineSize;
uint32_t mem_ppcCacheLineSize;
#endif
#if defined(OMR_CONFIGURABLE_SUSPEND_SIGNAL)
int32_t introspect_threadSuspendSignal;
Expand Down
43 changes: 27 additions & 16 deletions util/omrutil/j9memclr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2021 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -44,8 +44,8 @@ extern "C" void J9ZERZ10(void *ptr, uintptr_t length);
#endif

#if defined(AIXPPC) || defined(LINUXPPC)
static uintptr_t cacheLineSize = 0;
#endif
static uint32_t cacheLineSize = 0;
#endif /* defined(AIXPPC) || defined(LINUXPPC) */

#if defined(J9ZOS390)
struct IHAPSA {
Expand All @@ -65,8 +65,7 @@ OMRZeroMemory(void *ptr, uintptr_t length)

#if defined(AIXPPC) || defined(LINUXPPC)
char *addr = static_cast<char*>(ptr);
char *limit;
uintptr_t localCacheLineSize;
char *limit = NULL;

#if defined(LINUXPPC)
if (length < 2048) {
Expand All @@ -80,16 +79,16 @@ OMRZeroMemory(void *ptr, uintptr_t length)
cacheLineSize = getCacheLineSize();
}

uint32_t localCacheLineSize = cacheLineSize;

/* Zeroing by dcbz is effective if requested length is at least twice larger then Data Cache Block size */
if (length < (2 * cacheLineSize)) {
if (length < (2 * localCacheLineSize)) {
memset(ptr, 0, (size_t)length);
return;
}

/* VMDESIGN 1314 - Allow the compile to unroll the loop below by avoiding using the global in the loop */

localCacheLineSize = cacheLineSize;

/* Zero any initial portion to first cache line boundary
* Assumed here that size of first portion (from start to aligned address) is smaller then total requested size
* This is correct because sizes smaller then 2 * Data Cache Block size are served already
Expand All @@ -107,7 +106,11 @@ OMRZeroMemory(void *ptr, uintptr_t length)
/* dcbz forms a group on POWER4, so there is no reason to unroll */
limit = (char *)(((uintptr_t)ptr + length) & ~(localCacheLineSize - 1));
for (; addr < limit; addr += localCacheLineSize) {
__asm__ __volatile__("dcbz 0,%0" : /* no outputs */ : "r"(addr));
__asm__ __volatile__(
"dcbz 0, %0"
: /* no outputs */
: "r"(addr)
: /* clobbers */ "memory");
}

/* zero final portion smaller than a cache line */
Expand Down Expand Up @@ -143,21 +146,29 @@ OMRZeroMemory(void *ptr, uintptr_t length)
}


uintptr_t
uint32_t
getCacheLineSize(void)
{
#if defined(AIXPPC) || defined (LINUXPPC)
char buf[1024];
uintptr_t i, ppcCacheLineSize;
uint32_t i = 0;
uint32_t ppcCacheLineSize = 0;

/* xlc -O3 inlines/unrolls this memset */
memset(buf, 255, 1024);
__asm__ __volatile__("dcbz 0,%0" : /* no outputs */ : "r"(&buf[512]));
for (i = 0, ppcCacheLineSize = 0; i < 1024; i++) {
if (buf[i] == 0) {
ppcCacheLineSize++;
memset(buf, 255, sizeof(buf));

__asm__ __volatile__(
"dcbz 0, %0"
: /* no outputs */
: "r"((void *)&buf[512])
: /* clobbers */ "memory");

for (i = 0, ppcCacheLineSize = 0; i < sizeof(buf); ++i) {
if (0 == buf[i]) {
ppcCacheLineSize += 1;
}
}

return ppcCacheLineSize;
#else
return 0;
Expand Down

0 comments on commit c67d754

Please sign in to comment.