[DRAFT] Add basic support for compiling to ARM32 (?)

This is what I do to make it compile on Raspbian on my Raspberry Pi 3B+ - GcProbe.S is just copy from ARM64 - __cdecl is not defined on ARM, so I suppress that - I resurrect RareFlags from CoreRT in hopes that this is make it works, but no luck I try to resurrect dotnet/corert@d9847af in changes to regdisp.h but seems to be I do wrong thing here, and I should resurrect definitions from there. I would like to understand is RareFlags are still valid way, and if not what's the proper way. See dotnet#833
kant2002 · Jul 3, 2021 · ee7b781 · ee7b781
1 parent 93503aa
commit ee7b781
Show file tree

Hide file tree

Showing 7 changed files with 294 additions and 3 deletions.
diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h
@@ -155,7 +155,7 @@ inline TADDR GetRegdisplayStackMark(REGDISPLAY *display) {
 
 #elif defined(TARGET_64BIT)
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(__ARM_HOST)
 typedef struct _Arm64VolatileContextPointer
 {
     union {
@@ -185,7 +185,7 @@ typedef struct _Arm64VolatileContextPointer
 } Arm64VolatileContextPointer;
 #endif //TARGET_ARM64
 struct REGDISPLAY : public REGDISPLAY_BASE {
-#ifdef TARGET_ARM64
+#ifdef TARGET_ARM64 || defined(__ARM_HOST)
     Arm64VolatileContextPointer     volatileCurrContextPointers;
 #endif
 

diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h
@@ -647,7 +647,9 @@ inline uint8_t * PalNtCurrentTeb()
 // conditional compilation (upto and including defining an export of functionality that isn't a supported
 // intrinsic on that platform).
 //
-
+#if defined(HOST_ARM)
+#define __cdecl
+#endif
 EXTERN_C void * __cdecl _alloca(size_t);
 #pragma intrinsic(_alloca)
 

diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <unixasmmacros.inc>
+#include "AsmOffsets.inc"
+
+    .global RhpGcPoll2
+
+    LEAF_ENTRY RhpGcPoll
+        PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 0
+        cbnz    w0, RhpGcPollRare // TrapThreadsFlags_None = 0
+        ret
+    LEAF_END RhpGcPoll
+
+    NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler
+        PUSH_COOP_PINVOKE_FRAME x0
+        bl RhpGcPoll2
+        POP_COOP_PINVOKE_FRAME
+        ret
+    NESTED_END RhpGcPollRare
diff --git a/src/coreclr/nativeaot/Runtime/inc/OptionalFieldDefinitions.h b/src/coreclr/nativeaot/Runtime/inc/OptionalFieldDefinitions.h
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is designed to be included multiple times with different definitions of the
+// DEFINE_INLINE_OPTIONAL_FIELD macro in order to build data structures
+// related to each type of EEType optional field we support (see OptionalFields.h for details).
+//
+
+// The order of definition of the fields is somewhat important: for types that require multiple optional
+// fields the fields are laid out in the order of definition. Thus access to the fields defined first will be
+// slightly faster than the later fields.
+
+#ifndef DEFINE_INLINE_OPTIONAL_FIELD
+#error Must define DEFINE_INLINE_OPTIONAL_FIELD before including this file
+#endif
+
+//                               Field name                Field type
+DEFINE_INLINE_OPTIONAL_FIELD    (RareFlags,                uint32_t)
+DEFINE_INLINE_OPTIONAL_FIELD    (DispatchMap,              uint32_t)
+DEFINE_INLINE_OPTIONAL_FIELD    (ValueTypeFieldPadding,    uint32_t)
+DEFINE_INLINE_OPTIONAL_FIELD    (NullableValueOffset,      uint8_t)
+
+#undef DEFINE_INLINE_OPTIONAL_FIELD
diff --git a/src/coreclr/nativeaot/Runtime/inc/OptionalFields.h b/src/coreclr/nativeaot/Runtime/inc/OptionalFields.h
@@ -0,0 +1,202 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Support for optional fields attached out-of-line to EETypes (or any other data structure for that matter).
+// These should be used for attributes that exist for only a small subset of EETypes or are accessed only
+// rarely. The idea is to avoid bloating the size of the most common EETypes and to move some of the colder
+// data out-of-line to improve the density of the hot data. The basic idea is that the EEType contains a
+// pointer to an OptionalFields structure (which may be NULL) and that structure contains a somewhat
+// compressed version of the optional fields.
+//
+// For each OptionalFields instance we encode only the fields that are present so that the structure is as
+// small as possible while retaining reasonable access costs.
+//
+// This implies some tricky tradeoffs:
+//  * The more we compress the data the greater the access costs in terms of CPU.
+//  * More effective compression schemes tend to lead to the payload data being unaligned. This itself can
+//    result in overhead but on some architectures it's worse than that and the unaligned nature of the data
+//    requires special handling in client code. Generally it would be more robust and clean not to leak out
+//    such requirements to our callers. For small fields we can imagine copying the data into aligned storage
+//    (and indeed that might be a natural part of the decompression process). It might be more problematic for
+//    larger data items.
+//
+// In order to get the best of both worlds we employ a hybrid approach. Small values (typically single small
+// integers) get encoded inline in a compressed format. Decoding them will automatically copy them into
+// aligned storage. Larger values (such as complex data structures) will be stored out-of-line, naturally
+// aligned and uncompressed (at least by this layer of the software). The entry in the optional field record
+// will instead contain a reference to this out-of-line structure.
+//
+// Pointers are large (especially on 64-bit) and incur overhead in terms of base relocs and complexity (since
+// the locations requiring relocs may not be aligned). To mitigate this we can encode references to these
+// out-of-line records as deltas from a base address and by carefully ordering the layout of the out-of-line
+// records we can share the same base address amongst multiple OptionalFields structures.
+//
+// Taking this to one end of the logical extreme we could store a single base address such as the module base
+// address and encode all OptionalFields references as offsets from this; basically RVAs. This is cheap in the
+// respect that we only need one base address (and associated reloc) but the majority of OptionalFields
+// references will encode as fairly large deltas. As we'll touch on later our mechanism for compressing inline
+// values in OptionalRecords is based on discarding insignificant leading zero bits; i.e. we encode small
+// integers more effectively. So ideally we want to store multiple base addresses so we can lower the average
+// encoding cost of the deltas.
+//
+// An additional concern is how these base addresses are located. Take the module base address example: we
+// have no direct means of locating this based on an OptionalFields (or even the EEType that owns it). To
+// obtain this value we're likely to have to perform some operation akin to a range lookup and there are
+// interesting edge cases such as EETypes for generic types, which don't reside in modules.
+//
+// The approach taken here addresses several of the concerns above. The algorithm stores base addresses
+// interleaved with the OptionalFields. They are located at well-known locations by aligning their addresses
+// to a specific value (we can tune this but assume for the purposes of this explanation that the value is 64
+// bytes). This implies that the address requiring a base reloc is always aligned plus it can be located
+// cheaply from an OptionalFields address by masking off the low-order bits of that address.
+//
+// As OptionalFields are added any out-of-line data they reference is stored linearly in the same order (this
+// does imply that all out-of-line records must live in the same section and thus must have the same access
+// attributes). This provides locality: adjacent OptionalFields may encode deltas to different out-of-line
+// records but since the out-of-line records are adjacent (or nearly so) as well, both deltas will be about
+// the same size. Once we've filled in the space between stored base addresses (some padding might be needed
+// near the end where a full OptionalField won't fit, but this should be small given good compression of
+// OptionalFields) then we write out a new base address. This is chosen based on the first out-of-line record
+// referenced by the next OptionalField (i.e. it will make the first delta zero and keep the subsequent ones
+// small).
+//
+// Consider the following example where for the sake of simplicity we assume each OptionalFields structure has
+// precisely one out-of-line reference:
+//
+//    +-----------------+                        Out-of-line Records
+//    | Base Address    |----------------------> +--------------------+
+//    +-----------------+                        | #1                 |
+//    | OptionalFields  |                        +--------------------+
+//    |   Record #1     |                        | #2                 |
+//    |                 |                        |                    |
+//    +-----------------+                        +--------------------+
+//    | OptionalFields  |                        | #3                 |
+//    |   Record #2     |         /------------> +--------------------+
+//    |                 |        /               | #4                 |
+//    +-----------------+       /                |                    |
+//    | OptionalFields  |      /                 |                    |
+//    |   Record #3     |     /                  +--------------------+
+//    |                 |    /                   | #5                 |
+//    +-----------------+   /                    |                    |
+//    | Padding         |  /                     +--------------------+
+//    +-----------------+ /                      :                    :
+//    | Base Address    |-
+//    +-----------------+
+//    | OptionalFields  |
+//    |   Record #4     |
+//    |                 |
+//    +-----------------+
+//    | OptionalFields  |
+//    |   Record #5     |
+//    :                 :
+//
+// Each optional field uses the base address defined above it (at the lower memory address determined by
+// masking off the alignment bits). No matter which out-of-line records they reference the deltas will be as
+// small as we can make them.
+//
+// Lowering the alignment requirement introduces more base addresses and as a result also lowers the number of
+// OptionalFields that share the same base address, leading to smaller encodings for out-of-line deltas. But
+// at the same time it increases the number of pointers (and associated base relocs) that we must store.
+// Additionally the compression of the deltas is not completely linear: certain ranges of delta magnitude will
+// result in exactly the same storage being used when compressed. See the details of the delta encoding below
+// to see how we can use this to our advantage when tuning the alignment of base addresses.
+//
+// We optimize the case where OptionalFields structs don't contain any out-of-line references. We collect
+// those together and emit them in a single run with no interleaved base addresses.
+//
+// The OptionalFields record encoding itself is a byte stream representing one or more fields. The first byte
+// is a field header: it contains a field type tag in the low-order 7 bits (giving us 128 possible field
+// types) and the most significant bit indicates whether this is the last field of the structure. The field
+// value (a 32-bit unsigned number) is encoded using the existing VarInt support which encodes the value in
+// byte chunks taking between 1 and 5 bytes to do so.
+//
+// If the field value is out-of-line we decode the delta from the base address in much the same way as for
+// inline field values. Before adding the delta to the base address, however, we scale it based on the natural
+// alignment of the out-of-line data record it references. Since the out-of-line data is aligned on the same
+// basis this scaling avoids encoding bits that will always be zero and thus allows us to reference a greater
+// range of memory with a delta that encodes using less bytes.
+//
+// The value compression algorithm above gives us the non-linearity of compression referenced earlier. 32-bit
+// values will encode in a given number of bytes based on the having a given number of significant
+// (non-leading zero) bits:
+//      5 bytes : 25 - 32 significant bits
+//      4 bytes : 18 - 24 significant bits
+//      3 bytes : 11 - 17 significant bits
+//      2 bytes : 4 - 10 significant bits
+//      1 byte  : 0 - 3 significant bits
+//
+// We can use this to our advantage when choosing an alignment at which to store base addresses. Assuming that
+// most out-of-line data will have an alignment requirement of at least 4 bytes we note that the 2 byte
+// encoding already gives us an addressable range of 2^10 * 4 == 4KB which is likely to be enough for the vast
+// majority of cases. That is we can raise the granularity of base addresses until the average amount of
+// out-of-line data addressed begins to approach 4KB which lowers the cost of storing the base addresses while
+// not impacting the encoding size of deltas at all (there's no point in storing base addresses more
+// frequently because it won't make the encodings of deltas any smaller).
+//
+// Trying to tune for one byte deltas all the time is probably not worth it. The addressability range (again
+// assuming 4 byte alignment) is only 32 bytes and unless we start storing a lot of small data structures
+// out-of-line tuning for this will involve placing the base addresses very frequently and our costs will be
+// dominated by the size of the base address pointers and their relocs.
+//
+
+// Define enumeration of optional field tags.
+enum OptionalFieldTag
+{
+#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) OFT_##_name,
+#include "OptionalFieldDefinitions.h"
+    OFT_Count // Number of field types we support
+};
+
+// Array that indicates whether a given field type is inline (true) or out-of-line (false).
+static bool g_rgOptionalFieldTypeIsInline[OFT_Count] = {
+#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) true,
+#include "OptionalFieldDefinitions.h"
+};
+
+// Various random global constants we can tweak for performance tuning.
+enum OptionalFieldConstants
+{
+    // Constants determining how often we interleave a "header" containing a base address for out-of-line
+    // records into the stream of OptionalFields structures. These will occur at some power of 2 alignment of
+    // memory address. The alignment must at least exceed that of a pointer (since we'll store a pointer in
+    // the header and we need room for at least one OptionalFields record between each header). As the
+    // alignment goes up we store less headers but may impose a larger one-time padding cost at the start of
+    // the optional fields memory block as well as increasing the average encoding size for out-of-line record
+    // deltas in each optional field record.
+    //
+    // Note that if you change these constants you must be sure to modify the alignment of the optional field
+    // virtual section in ZapImage.cpp as well as ensuring the alignment of the containing physical section is
+    // at least as high (this latter cases matters for the COFF output case only, when we're generating PE
+    // images directly the physical section will get page alignment).
+    OFC_HeaderAlignmentShift    = 7,
+    OFC_HeaderAlignmentBytes    = 1 << OFC_HeaderAlignmentShift,
+    OFC_HeaderAlignmentMask     = OFC_HeaderAlignmentBytes - 1,
+};
+
+typedef DPTR(class OptionalFields) PTR_OptionalFields;
+typedef DPTR(PTR_OptionalFields) PTR_PTR_OptionalFields;
+
+class OptionalFields
+{
+public:
+    // Define accessors for each field type.
+#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type)                       \
+    _type Get##_name(_type defaultValue)                                 \
+    {                                                                    \
+    return (_type)GetInlineField(OFT_##_name, (uint32_t)defaultValue); \
+    }
+
+#include "OptionalFieldDefinitions.h"
+
+private:
+    // Reads a field value (or the basis for an out-of-line record delta) starting from the first byte after
+    // the field header. Advances the field location to the start of the next field.
+    static OptionalFieldTag DecodeFieldTag(PTR_UInt8 * ppFields, bool *pfLastField);
+
+    // Reads a field value (or the basis for an out-of-line record delta) starting from the first byte of a
+    // field description. Advances the field location to the start of the next field.
+    static uint32_t DecodeFieldValue(PTR_UInt8 * ppFields);
+
+    uint32_t GetInlineField(OptionalFieldTag eTag, uint32_t uiDefaultValue);
+};
diff --git a/src/coreclr/nativeaot/Runtime/inc/eetype.h b/src/coreclr/nativeaot/Runtime/inc/eetype.h
@@ -9,6 +9,7 @@
 // Forward declarations
 
 class EEType;
+class OptionalFields;
 class TypeManager;
 struct TypeManagerHandle;
 class DynamicModule;
@@ -106,6 +107,8 @@ enum EETypeField
 // Fundamental runtime type representation
 typedef DPTR(class EEType) PTR_EEType;
 typedef DPTR(PTR_EEType) PTR_PTR_EEType;
+typedef DPTR(class OptionalFields) PTR_OptionalFields;
+typedef DPTR(PTR_OptionalFields) PTR_PTR_OptionalFields;
 
 extern "C" void PopulateDebugHeaders();
 
@@ -335,6 +338,12 @@ class EEType
 
     uint32_t GetHashCode();
 
+    // Retrieve optional fields associated with this EEType. May be NULL if no such fields exist.
+    inline PTR_OptionalFields get_OptionalFields();
+
+    // Get flags that are less commonly set on EETypes.
+    inline uint32_t get_RareFlags();
+
     // Helper methods that deal with EEType topology (size and field layout). These are useful since as we
     // optimize for pay-for-play we increasingly want to customize exactly what goes into an EEType on a
     // per-type basis. The rules that govern this can be both complex and volatile and we risk sprinkling
@@ -356,3 +365,4 @@ class EEType
 
 #pragma warning(pop)
 
+#include "OptionalFields.h"
diff --git a/src/coreclr/nativeaot/Runtime/inc/eetype.inl b/src/coreclr/nativeaot/Runtime/inc/eetype.inl
@@ -86,6 +86,39 @@ inline PTR_UInt8 FollowRelativePointer(const int32_t* pDist)
     return result;
 }
 
+// Retrieve optional fields associated with this EEType. May be NULL if no such fields exist.
+inline PTR_OptionalFields EEType::get_OptionalFields()
+{
+    if ((m_usFlags & OptionalFieldsFlag) == 0)
+        return NULL;
+
+    uint32_t cbOptionalFieldsOffset = GetFieldOffset(ETF_OptionalFieldsPtr);
+
+#if !defined(USE_PORTABLE_HELPERS)
+    if (!IsDynamicType())
+    {
+        return (OptionalFields*)FollowRelativePointer((int32_t*)((uint8_t*)this + cbOptionalFieldsOffset));
+    }
+    else
+#endif
+    {
+        return *(OptionalFields**)((uint8_t*)this + cbOptionalFieldsOffset);
+    }
+}
+
+// Get flags that are less commonly set on EETypes.
+inline uint32_t EEType::get_RareFlags()
+{
+    OptionalFields * pOptFields = get_OptionalFields();
+
+    // If there are no optional fields then none of the rare flags have been set.
+    if (!pOptFields)
+        return 0;
+
+    // Get the flags from the optional fields. The default is zero if that particular field was not included.
+    return pOptFields->GetRareFlags(0);
+}
+
 inline TypeManagerHandle* EEType::GetTypeManagerPtr()
 {
     uint32_t cbOffset = GetFieldOffset(ETF_TypeManagerIndirection);