From f1eda00bf6b065c094f0f1a8c50858a4d9eece3c Mon Sep 17 00:00:00 2001 From: Brian Osman Date: Thu, 1 Nov 2018 12:41:36 -0400 Subject: [PATCH] Remove more unused color functions, and an unused Neon blitter Bug: skia: Change-Id: I7e53fe4bc7cba31629c80a1472d11a8a30fe6a5a Reviewed-on: https://skia-review.googlesource.com/c/167391 Commit-Queue: Brian Osman Commit-Queue: Mike Klein Reviewed-by: Mike Klein --- src/core/SkColorData.h | 114 -------------------------- src/opts/SkBlitMask_opts_arm_neon.cpp | 74 ----------------- 2 files changed, 188 deletions(-) diff --git a/src/core/SkColorData.h b/src/core/SkColorData.h index e46bd25104b9e..295e923e0ad04 100644 --- a/src/core/SkColorData.h +++ b/src/core/SkColorData.h @@ -164,15 +164,6 @@ static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) { return (r * 54 + g * 183 + b * 19) >> 8; } -/** - * Turn a 0..255 value into a 0..256 value, rounding up if the value is >= 0x80. - * This is slightly more accurate than SkAlpha255To256. - */ -static inline unsigned Sk255To256(U8CPU value) { - SkASSERT(SkToU8(value) == value); - return value + (value >> 7); -} - /** Calculates 256 - (value * alpha256) / 255 in range [0,256], * for [0,255] value and [0,256] alpha256. */ @@ -205,89 +196,6 @@ static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) { #define SK_G16_MASK_IN_PLACE (SK_G16_MASK << SK_G16_SHIFT) #define SK_B16_MASK_IN_PLACE (SK_B16_MASK << SK_B16_SHIFT) -/** Expand the 16bit color into a 32bit value that can be scaled all at once - by a value up to 32. Used in conjunction with SkCompact_rgb_16. -*/ -static inline uint32_t SkExpand_rgb_16(U16CPU c) { - SkASSERT(c == (uint16_t)c); - - return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE); -} - -/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit - color value. The computation yields only 16bits of valid data, but we claim - to return 32bits, so that the compiler won't generate extra instructions to - "clean" the top 16bits. However, the top 16 can contain garbage, so it is - up to the caller to safely ignore them. -*/ -static inline U16CPU SkCompact_rgb_16(uint32_t c) { - return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE); -} - -/** Scale the 16bit color value by the 0..256 scale parameter. - The computation yields only 16bits of valid data, but we claim - to return 32bits, so that the compiler won't generate extra instructions to - "clean" the top 16bits. -*/ -static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) { - return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5); -} - -// this helper explicitly returns a clean 16bit value (but slower) -#define SkAlphaMulRGB16_ToU16(c, s) (uint16_t)SkAlphaMulRGB16(c, s) - -/** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter. - The computation yields only 16bits of valid data, but we claim to return - 32bits, so that the compiler won't generate extra instructions to "clean" - the top 16bits. -*/ -static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) { - uint32_t dst_expand = SkExpand_rgb_16(dst) * scale; - return SkCompact_rgb_16((src_expand + dst_expand) >> 5); -} - -/** Blend src and dst 16bit colors by the 0..256 scale parameter. - The computation yields only 16bits of valid data, but we claim - to return 32bits, so that the compiler won't generate extra instructions to - "clean" the top 16bits. -*/ -static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) { - SkASSERT((unsigned)srcScale <= 256); - - srcScale >>= 3; - - uint32_t src32 = SkExpand_rgb_16(src); - uint32_t dst32 = SkExpand_rgb_16(dst); - return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5)); -} - -static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[], - int srcScale, int count) { - SkASSERT(count > 0); - SkASSERT((unsigned)srcScale <= 256); - - srcScale >>= 3; - - do { - uint32_t src32 = SkExpand_rgb_16(*src++); - uint32_t dst32 = SkExpand_rgb_16(*dst); - *dst++ = static_cast( - SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5))); - } while (--count > 0); -} - -#ifdef SK_DEBUG - static inline U16CPU SkRGB16Add(U16CPU a, U16CPU b) { - SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK); - SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK); - SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK); - - return a + b; - } -#else - #define SkRGB16Add(a, b) ((a) + (b)) -#endif - /////////////////////////////////////////////////////////////////////////////// #ifdef SK_DEBUG @@ -304,28 +212,6 @@ static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[], #define SkPMColorAssert(c) #endif -static inline bool SkPMColorValid(SkPMColor c) { - auto a = SkGetPackedA32(c); - bool valid = a <= SK_A32_MASK - && SkGetPackedR32(c) <= a - && SkGetPackedG32(c) <= a - && SkGetPackedB32(c) <= a; - if (valid) { - SkPMColorAssert(c); // Make sure we're consistent when it counts. - } - return valid; -} - -static inline uint32_t SkPackPMColor_as_RGBA(SkPMColor c) { - return SkPackARGB_as_RGBA(SkGetPackedA32(c), SkGetPackedR32(c), - SkGetPackedG32(c), SkGetPackedB32(c)); -} - -static inline uint32_t SkPackPMColor_as_BGRA(SkPMColor c) { - return SkPackARGB_as_BGRA(SkGetPackedA32(c), SkGetPackedR32(c), - SkGetPackedG32(c), SkGetPackedB32(c)); -} - /** * Abstract 4-byte interpolation, implemented on top of SkPMColor * utility functions. Third parameter controls blending of the first two: diff --git a/src/opts/SkBlitMask_opts_arm_neon.cpp b/src/opts/SkBlitMask_opts_arm_neon.cpp index a32392906e0c2..37c85eeed6dc1 100644 --- a/src/opts/SkBlitMask_opts_arm_neon.cpp +++ b/src/opts/SkBlitMask_opts_arm_neon.cpp @@ -125,77 +125,3 @@ void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[], dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); } } - -#define LOAD_LANE_16(reg, n) \ - reg = vld1q_lane_u16(device, reg, n); \ - device = (uint16_t*)((char*)device + deviceRB); - -#define STORE_LANE_16(reg, n) \ - vst1_lane_u16(dst, reg, n); \ - dst = (uint16_t*)((char*)dst + deviceRB); - -void SkRGB16BlitterBlitV_neon(uint16_t* device, - int height, - size_t deviceRB, - unsigned scale, - uint32_t src32) { - if (height >= 8) - { - uint16_t* dst = device; - - // prepare constants - uint16x8_t vdev = vdupq_n_u16(0); - uint16x8_t vmaskq_g16 = vdupq_n_u16(SK_G16_MASK_IN_PLACE); - uint16x8_t vmaskq_ng16 = vdupq_n_u16(~SK_G16_MASK_IN_PLACE); - uint32x4_t vsrc32 = vdupq_n_u32(src32); - uint32x4_t vscale5 = vdupq_n_u32((uint32_t)scale); - - while (height >= 8){ - LOAD_LANE_16(vdev, 0) - LOAD_LANE_16(vdev, 1) - LOAD_LANE_16(vdev, 2) - LOAD_LANE_16(vdev, 3) - LOAD_LANE_16(vdev, 4) - LOAD_LANE_16(vdev, 5) - LOAD_LANE_16(vdev, 6) - LOAD_LANE_16(vdev, 7) - - // Expand_rgb_16 - uint16x8x2_t vdst = vzipq_u16((vdev & vmaskq_ng16), (vdev & vmaskq_g16)); - uint32x4_t vdst32_lo = vmulq_u32(vreinterpretq_u32_u16(vdst.val[0]), vscale5); - uint32x4_t vdst32_hi = vmulq_u32(vreinterpretq_u32_u16(vdst.val[1]), vscale5); - - // Compact_rgb_16 - vdst32_lo = vaddq_u32(vdst32_lo, vsrc32); - vdst32_hi = vaddq_u32(vdst32_hi, vsrc32); - vdst32_lo = vshrq_n_u32(vdst32_lo, 5); - vdst32_hi = vshrq_n_u32(vdst32_hi, 5); - - uint16x4_t vtmp_lo = vmovn_u32(vdst32_lo) & vget_low_u16(vmaskq_ng16); - uint16x4_t vtmp_hi = vshrn_n_u32(vdst32_lo, 16) & vget_low_u16(vmaskq_g16); - uint16x4_t vdst16_lo = vorr_u16(vtmp_lo, vtmp_hi); - vtmp_lo = vmovn_u32(vdst32_hi) & vget_low_u16(vmaskq_ng16); - vtmp_hi = vshrn_n_u32(vdst32_hi, 16) & vget_low_u16(vmaskq_g16); - uint16x4_t vdst16_hi = vorr_u16(vtmp_lo, vtmp_hi); - - STORE_LANE_16(vdst16_lo, 0) - STORE_LANE_16(vdst16_lo, 1) - STORE_LANE_16(vdst16_lo, 2) - STORE_LANE_16(vdst16_lo, 3) - STORE_LANE_16(vdst16_hi, 0) - STORE_LANE_16(vdst16_hi, 1) - STORE_LANE_16(vdst16_hi, 2) - STORE_LANE_16(vdst16_hi, 3) - height -= 8; - } - } - while (height != 0){ - uint32_t dst32 = SkExpand_rgb_16(*device) * scale; - *device = SkCompact_rgb_16((src32 + dst32) >> 5); - device = (uint16_t*)((char*)device + deviceRB); - height--; - } -} - -#undef LOAD_LANE_16 -#undef STORE_LANE_16