-

   rss_rss_hh_new

 - e-mail

 

 -

 LiveInternet.ru:
: 17.03.2011
:
:
: 51

:


[] ARM

, 09 2017 . 18:33 +
, , . ASCII () (\r \n). , 32.

, , 128- (SSE4). 5-10 .

, 128- , x64. ARM , x64?

:

size_t i = 0, pos = 0;
while (i < howmany) {
    char c = bytes[i++];
    bytes[pos] = c;
    pos += (c > 32 ? 1 : 0);
}

32 . .

?

x64 16 , , ( bitset), 16 , , , . x64, (movemask). ARM . movemask .

, ARM , x86. ?

SS4, , 32, :

static inline uint8x16_t is_white(uint8x16_t data) {
  const uint8x16_t wchar = vdupq_n_u8(' ');
  uint8x16_t isw = vcleq_u8(data, wchar);
  return isw;
}

16 , , :

static inline uint64_t is_not_zero(uint8x16_t v) {
  uint64x2_t v64 = vreinterpretq_u64_u8(v);
  uint32x2_t v32 = vqmovn_u64(v64);
  uint64x1_t result = vreinterpret_u64_u32(v32);
  return result[0];
}

. , 16 . , 16 . , , :

uint8x16_t vecbytes = vld1q_u8((uint8_t *)bytes + i);
uint8x16_t w = is_white(vecbytes);
uint64_t haswhite = is_not_zero(w);
w0 = vaddq_u8(justone, w);
if(!haswhite) {
      vst1q_u8((uint8_t *)bytes + pos,vecbytes);
      pos += 16;
      i += 16;
 } else {
      for (int k = 0; k < 16; k++) {
        bytes[pos] = bytes[i++];
        pos += w[k];
     }
}

, 16 . , .

, , , , , . , ARM. 64- ARM- ( A57). . , Nintendo Switch.

1,40
NEON 1,04

. 1,7, , perf stat. :

ARM x64
2,4 1,2
(NEON SS4) 1,8 0,25

, x64 - 1,2 , ARM . , A57 x64 . SS4 x64 0,25 , , ARM NEON.

. x64 movemask/pshufb . ARM NEON .

ARM . , x86/x64. ARM NEON , SSE/AVX. movemask .

, , ARM NEON , ?

. : , 64- ARM 16 .
Original source: habrahabr.ru (comments, light).

https://habrahabr.ru/post/332710/

:  

: [1] []
 

:
: 

: ( )

:

  URL