Fork me on GitHub
Nothing Special   »   [go: up one dir, main page]

src/arraymancer/laser/simd

Search:
Group by:
  Source Edit

Types

m128 {.importc: "__m128", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m128d {.importc: "__m128d", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m128i {.importc: "__m128i", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m256 {.importc: "__m256", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m256d {.importc: "__m256d", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m256i {.importc: "__m256i", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m512 {.importc: "__m512", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m512d {.importc: "__m512d", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
m512i {.importc: "__m512i", bycopy, header: "<x86intrin.h>".} = object
  Source Edit
mmask16 {.importc: "__mmask16", bycopy, header: "<x86intrin.h>".} = distinct uint16
  Source Edit
mmask64 {.importc: "__mmask64", bycopy, header: "<x86intrin.h>".} = distinct uint64
  Source Edit

Procs

func cvtmask64_u64(a: mmask64): uint64 {.importc: "_cvtmask64_u64", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_add_epi8(a, b: m256i): m256i {.importc: "_mm256_add_epi8", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_add_epi16(a, b: m256i): m256i {.importc: "_mm256_add_epi16", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_add_epi32(a, b: m256i): m256i {.importc: "_mm256_add_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_add_epi64(a, b: m256i): m256i {.importc: "_mm256_add_epi64", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_add_pd(a, b: m256d): m256d {.importc: "_mm256_add_pd", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm256_add_ps(a, b: m256): m256 {.importc: "_mm256_add_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm256_and_ps(a, b: m256): m256 {.importc: "_mm256_and_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
Bitwise and   Source Edit
func mm256_and_si256(a, b: m256i): m256i {.importc: "_mm256_and_si256", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Bitwise and   Source Edit
func mm256_castps256_ps128(a: m256): m128 {.importc: "_mm256_castps256_ps128",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Returns the lower part of a m256 in a m128   Source Edit
func mm256_castps_si256(a: m256): m256i {.importc: "_mm256_castps_si256",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Cast a float32x8 vectors into a 256-bit int vector with the same bit pattern   Source Edit
func mm256_castsi256_ps(a: m256i): m256 {.importc: "_mm256_castsi256_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Cast a 256-bit int vector into a float32x8 vector with the same bit pattern   Source Edit
func mm256_cmpgt_epi32(a, b: m256i): m256i {.importc: "_mm256_cmpgt_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Compare a greater than b   Source Edit
func mm256_cvtepi32_ps(a: m256i): m256 {.importc: "_mm256_cvtepi32_ps", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Convert a int32x8 to float32x8   Source Edit
func mm256_cvtps_epi32(a: m256): m256i {.importc: "_mm256_cvtps_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Convert a float32x8 to int32x8   Source Edit
func mm256_extractf128_ps(v: m256; m: cint{lit}): m128 {.
    importc: "_mm256_extractf128_ps", nodecl, header: "<x86intrin.h>",
    ...raises: [], tags: [], forbids: [].}
Extracts the low part (m = 0) or high part (m = 1) of a m256 into a m128 m must be a literal   Source Edit
func mm256_fmadd_pd(a, b, c: m256d): m256d {.importc: "_mm256_fmadd_pd", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_fmadd_ps(a, b, c: m256): m256 {.importc: "_mm256_fmadd_ps", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_i32gather_epi32(m: ptr (uint32 or int32); i: m256i; s: int32): m256i {.
    importc: "_mm256_i32gather_epi32", nodecl, header: "<x86intrin.h>",
    ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_load_pd(aligned_mem_addr: ptr float64): m256d {.
    importc: "_mm256_load_pd", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_load_ps(aligned_mem_addr: ptr float32): m256 {.
    importc: "_mm256_load_ps", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_load_si256(mem_addr: ptr m256i): m256i {.
    importc: "_mm256_load_si256", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_loadu_pd(mem_addr: ptr float64): m256d {.importc: "_mm256_loadu_pd",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_loadu_ps(mem_addr: ptr float32): m256 {.importc: "_mm256_loadu_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_loadu_si256(mem_addr: ptr m256i): m256i {.
    importc: "_mm256_loadu_si256", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_max_ps(a, b: m256): m256 {.importc: "_mm256_max_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm256_min_ps(a, b: m256): m256 {.importc: "_mm256_min_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm256_movemask_epi8(a: m256i): int32 {.importc: "_mm256_movemask_epi8",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Returns the most significant bit of each 8-bit elements in a   Source Edit
func mm256_mul_epu32(a: m256i; b: m256i): m256i {.importc: "_mm256_mul_epu32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}

From a = {a3_hi, a3_lo, a2_hi, a2_lo, a1_hi, a1_lo, a0_hi, a0_lo} with a3, a2, a1, a0 being 64-bit number and b = {b3_hi, b3_lo, b2_hi, b2_lo, b1_hi, b1_lo, b0_hi, b0_lo}

Result = {a3_lo * b3_lo, a2_lo * b2_lo, a1_lo * b1_lo, a0_lo * b0_lo}. This is an extended precision multiplication 32x32 -> 64

  Source Edit
func mm256_mul_pd(a, b: m256d): m256d {.importc: "_mm256_mul_pd", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm256_mul_ps(a, b: m256): m256 {.importc: "_mm256_mul_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm256_mullo_epi16(a, b: m256i): m256i {.importc: "_mm256_mullo_epi16",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Multiply element-wise 2 vectors of 16 16-bit ints into intermediate 16 32-bit ints, and keep the low 16-bit parts   Source Edit
func mm256_mullo_epi32(a, b: m256i): m256i {.importc: "_mm256_mullo_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Multiply element-wise 2 vectors of 8x 32-bit ints into intermediate 8x 64-bit ints, and keep the low 32-bit parts   Source Edit
func mm256_or_ps(a, b: m256): m256 {.importc: "_mm256_or_ps", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
  Source Edit
func mm256_set1_epi8(a: int8 or uint8): m256i {.importc: "_mm256_set1_epi8",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_set1_epi16(a: int16 or uint16): m256i {.importc: "_mm256_set1_epi16",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_set1_epi32(a: int32 or uint32): m256i {.importc: "_mm256_set1_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm256_set1_epi64x(a: int64 or uint64): m256i {.
    importc: "_mm256_set1_epi64x", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_set1_pd(a: float64): m256d {.importc: "_mm256_set1_pd", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm256_set1_ps(a: float32): m256 {.importc: "_mm256_set1_ps", nodecl,
                                       header: "<x86intrin.h>", ...raises: [],
                                       tags: [], forbids: [].}
  Source Edit
func mm256_setzero_pd(): m256d {.importc: "_mm256_setzero_pd", nodecl,
                                 header: "<x86intrin.h>", ...raises: [], tags: [],
                                 forbids: [].}
  Source Edit
func mm256_setzero_ps(): m256 {.importc: "_mm256_setzero_ps", nodecl,
                                header: "<x86intrin.h>", ...raises: [], tags: [],
                                forbids: [].}
  Source Edit
func mm256_setzero_si256(): m256i {.importc: "_mm256_setzero_si256", nodecl,
                                    header: "<x86intrin.h>", ...raises: [],
                                    tags: [], forbids: [].}
  Source Edit
func mm256_shuffle_epi32(a: m256i; imm8: cint): m256i {.
    importc: "_mm256_shuffle_epi32", nodecl, header: "<x86intrin.h>",
    ...raises: [], tags: [], forbids: [].}
Shuffle 32-bit integers in a according to the control in imm8 Formula is in big endian representation a = {hi[a7, a6, a5, a4, loa3, a2, a1, a0} dst = {d7, d6, d5, d4, d3, d2, d1, d0} imm8 = {bits76, bits54, bits32, bits10} d0 will refer a.lobits10 d1 a.lobits32 ... d4 will refer a.hibits10 d5 a.hibits32   Source Edit
func mm256_slli_epi32(a: m256i; count: int32): m256i {.
    importc: "_mm256_slli_epi32", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_srli_epi32(a: m256i; count: int32): m256i {.
    importc: "_mm256_srli_epi32", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_srli_epi64(a: m256i; imm8: cint): m256i {.
    importc: "_mm256_srli_epi64", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
Logical right shift   Source Edit
func mm256_store_pd(mem_addr: ptr float64; a: m256d) {.
    importc: "_mm256_store_pd", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_store_ps(mem_addr: ptr float32; a: m256) {.
    importc: "_mm256_store_ps", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_storeu_pd(mem_addr: ptr float64; a: m256d) {.
    importc: "_mm256_storeu_pd", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_storeu_ps(mem_addr: ptr float32; a: m256) {.
    importc: "_mm256_storeu_ps", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_storeu_si256(mem_addr: ptr m256i; a: m256i) {.
    importc: "_mm256_storeu_si256", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm256_sub_ps(a, b: m256): m256 {.importc: "_mm256_sub_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm512_add_epi8(a, b: m512i): m512i {.importc: "_mm512_add_epi8", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_add_epi16(a, b: m512i): m512i {.importc: "_mm512_add_epi16", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_add_epi32(a, b: m512i): m512i {.importc: "_mm512_add_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_add_epi64(a, b: m512i): m512i {.importc: "_mm512_add_epi64", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_add_pd(a, b: m512d): m512d {.importc: "_mm512_add_pd", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm512_add_ps(a, b: m512): m512 {.importc: "_mm512_add_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm512_and_si512(a, b: m512i): m512i {.importc: "_mm512_and_si512", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Bitwise and   Source Edit
func mm512_castps_si512(a: m512): m512i {.importc: "_mm512_castps_si512",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Cast a float32x16 vectors into a 512-bit int vector with the same bit pattern   Source Edit
func mm512_castsi512_ps(a: m512i): m512 {.importc: "_mm512_castsi512_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Cast a 512-bit int vector into a float32x16 vector with the same bit pattern   Source Edit
func mm512_cmpgt_epi32_mask(a, b: m512i): mmask16 {.
    importc: "_mm512_cmpgt_epi32_mask", nodecl, header: "<x86intrin.h>",
    ...raises: [], tags: [], forbids: [].}
Compare a greater than b, returns a 16-bit mask   Source Edit
func mm512_cvtepi32_ps(a: m512i): m512 {.importc: "_mm512_cvtepi32_ps", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Convert a int32x8 to float32x16   Source Edit
func mm512_cvtps_epi32(a: m512): m512i {.importc: "_mm512_cvtps_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Convert a float32x16 to int32x8   Source Edit
func mm512_fmadd_pd(a, b, c: m512d): m512d {.importc: "_mm512_fmadd_pd", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_fmadd_ps(a, b, c: m512): m512 {.importc: "_mm512_fmadd_ps", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_i32gather_epi32(i: m512i; m: ptr (uint32 or int32); s: int32): m512i {.
    importc: "_mm512_i32gather_epi32", nodecl, header: "<x86intrin.h>",
    ...raises: [], tags: [], forbids: [].}
 Warning âš : Argument are switched compared to mm256_i32gather_epi32   Source Edit
func mm512_load_pd(aligned_mem_addr: ptr float64): m512d {.
    importc: "_mm512_load_pd", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_load_ps(aligned_mem_addr: ptr float32): m512 {.
    importc: "_mm512_load_ps", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_load_si512(mem_addr: ptr SomeInteger): m512i {.
    importc: "_mm512_load_si512", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_loadu_pd(mem_addr: ptr float64): m512d {.importc: "_mm512_loadu_pd",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_loadu_ps(mem_addr: ptr float32): m512 {.importc: "_mm512_loadu_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_loadu_si512(mem_addr: ptr SomeInteger): m512i {.
    importc: "_mm512_loadu_si512", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_maskz_set1_epi32(k: mmask16; a: cint): m512i {.
    importc: "_mm512_maskz_set1_epi32", nodecl, header: "<x86intrin.h>",
    ...raises: [], tags: [], forbids: [].}
Compare a greater than b Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).   Source Edit
func mm512_max_ps(a, b: m512): m512 {.importc: "_mm512_max_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm512_min_ps(a, b: m512): m512 {.importc: "_mm512_min_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm512_movepi8_mask(a: m512i): mmask64 {.importc: "_mm512_movepi8_mask",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Returns the most significant bit of each 8-bit elements in a   Source Edit
func mm512_movm_epi32(a: mmask16): m512i {.importc: "_mm512_movm_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_mul_pd(a, b: m512d): m512d {.importc: "_mm512_mul_pd", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm512_mul_ps(a, b: m512): m512 {.importc: "_mm512_mul_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm512_mullo_epi32(a, b: m512i): m512i {.importc: "_mm512_mullo_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Multiply element-wise 2 vectors of 16 32-bit ints into intermediate 16 32-bit ints, and keep the low 32-bit parts   Source Edit
func mm512_mullo_epi64(a, b: m512i): m512i {.importc: "_mm512_mullo_epi64",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Multiply element-wise 2 vectors of 8x 64-bit ints into intermediate 8x 64-bit ints, and keep the low 64-bit parts   Source Edit
func mm512_or_ps(a, b: m512): m512 {.importc: "_mm512_or_ps", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
  Source Edit
func mm512_set1_epi8(a: int8 or uint8): m512i {.importc: "_mm512_set1_epi8",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_set1_epi16(a: int16 or uint16): m512i {.importc: "_mm512_set1_epi16",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_set1_epi32(a: int32 or uint32): m512i {.importc: "_mm512_set1_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_set1_epi64(a: int64 or uint64): m512i {.importc: "_mm512_set1_epi64",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm512_set1_pd(a: float64): m512d {.importc: "_mm512_set1_pd", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm512_set1_ps(a: float32): m512 {.importc: "_mm512_set1_ps", nodecl,
                                       header: "<x86intrin.h>", ...raises: [],
                                       tags: [], forbids: [].}
  Source Edit
func mm512_setzero_pd(): m512d {.importc: "_mm512_setzero_pd", nodecl,
                                 header: "<x86intrin.h>", ...raises: [], tags: [],
                                 forbids: [].}
  Source Edit
func mm512_setzero_ps(): m512 {.importc: "_mm512_setzero_ps", nodecl,
                                header: "<x86intrin.h>", ...raises: [], tags: [],
                                forbids: [].}
  Source Edit
func mm512_setzero_si512(): m512i {.importc: "_mm512_setzero_si512", nodecl,
                                    header: "<x86intrin.h>", ...raises: [],
                                    tags: [], forbids: [].}
  Source Edit
func mm512_slli_epi32(a: m512i; count: int32): m512i {.
    importc: "_mm512_slli_epi32", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_srli_epi32(a: m512i; count: int32): m512i {.
    importc: "_mm512_srli_epi32", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_store_pd(mem_addr: ptr float64; a: m512d) {.
    importc: "_mm512_store_pd", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_store_ps(mem_addr: ptr float32; a: m512) {.
    importc: "_mm512_store_ps", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_storeu_pd(mem_addr: ptr float64; a: m512d) {.
    importc: "_mm512_storeu_pd", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_storeu_ps(mem_addr: ptr float32; a: m512) {.
    importc: "_mm512_storeu_ps", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_storeu_si512(mem_addr: ptr SomeInteger; a: m512i) {.
    importc: "_mm512_storeu_si512", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm512_sub_ps(a, b: m512): m512 {.importc: "_mm512_sub_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
  Source Edit
func mm_add_epi8(a, b: m128i): m128i {.importc: "_mm_add_epi8", nodecl,
                                       header: "<x86intrin.h>", ...raises: [],
                                       tags: [], forbids: [].}
  Source Edit
func mm_add_epi16(a, b: m128i): m128i {.importc: "_mm_add_epi16", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm_add_epi32(a, b: m128i): m128i {.importc: "_mm_add_epi32", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm_add_epi64(a, b: m128i): m128i {.importc: "_mm_add_epi64", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm_add_pd(a, b: m128d): m128d {.importc: "_mm_add_pd", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
  Source Edit
func mm_add_ps(a, b: m128): m128 {.importc: "_mm_add_ps", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_add_ss(a, b: m128): m128 {.importc: "_mm_add_ss", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_and_si128(a, b: m128i): m128i {.importc: "_mm_and_si128", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
  Source Edit
func mm_castps_si128(a: m128): m128i {.importc: "_mm_castps_si128", nodecl,
                                       header: "<x86intrin.h>", ...raises: [],
                                       tags: [], forbids: [].}
Cast a float32x4 vectors into a 128-bit int vector with the same bit pattern   Source Edit
func mm_castsi128_ps(a: m128i): m128 {.importc: "_mm_castsi128_ps", nodecl,
                                       header: "<x86intrin.h>", ...raises: [],
                                       tags: [], forbids: [].}
Cast a 128-bit int vector into a float32x8 vector with the same bit pattern   Source Edit
func mm_cmpgt_epi32(a, b: m128i): m128i {.importc: "_mm_cmpgt_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Compare a greater than b   Source Edit
func mm_cvtepi32_ps(a: m128i): m128 {.importc: "_mm_cvtepi32_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
Convert a int32x4 to float32x4   Source Edit
func mm_cvtps_epi32(a: m128): m128i {.importc: "_mm_cvtps_epi32", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
Convert a float32x4 to int32x4   Source Edit
func mm_cvtsi128_si32(a: m128i): cint {.importc: "_mm_cvtsi128_si32", nodecl,
                                        header: "<x86intrin.h>", ...raises: [],
                                        tags: [], forbids: [].}
Copy the low part of a to int32   Source Edit
func mm_cvtss_f32(a: m128): float32 {.importc: "_mm_cvtss_f32", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
Extract the low part of the input Input: { A0, A1, A2, A3 } Result: A0   Source Edit
func mm_extract_epi16(a: m128i; imm8: cint): cint {.
    importc: "_mm_extract_epi16", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
Extract an int16 from a, selected with imm8  and store it in the lower part of destination (padded with zeroes)   Source Edit
func mm_i32gather_epi32(m: ptr (uint32 or int32); i: m128i; s: int32): m128i {.
    importc: "_mm_i32gather_epi32", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm_load_pd(aligned_mem_addr: ptr float64): m128d {.importc: "_mm_load_pd",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_load_ps(aligned_mem_addr: ptr float32): m128 {.importc: "_mm_load_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_load_si128(mem_addr: ptr m128i): m128i {.importc: "_mm_load_si128",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_load_ss(aligned_mem_addr: ptr float32): m128 {.importc: "_mm_load_ss",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_loadu_pd(mem_addr: ptr float64): m128d {.importc: "_mm_loadu_pd",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_loadu_ps(data: ptr float32): m128 {.importc: "_mm_loadu_ps", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_loadu_si128(mem_addr: ptr m128i): m128i {.importc: "_mm_loadu_si128",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_max_ps(a, b: m128): m128 {.importc: "_mm_max_ps", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_max_ss(a, b: m128): m128 {.importc: "_mm_max_ss", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_min_ps(a, b: m128): m128 {.importc: "_mm_min_ps", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_min_ss(a, b: m128): m128 {.importc: "_mm_min_ss", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_movehdup_ps(a: m128): m128 {.importc: "_mm_movehdup_ps", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
Duplicates high parts of the input Input: { A0, A1, A2, A3 } Result: { A1, A1, A3, A3 }   Source Edit
func mm_movehl_ps(a, b: m128): m128 {.importc: "_mm_movehl_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
Input: { A0, A1, A2, A3 }, { B0, B1, B2, B3 } Result: { B2, B3, A2, A3 }   Source Edit
func mm_moveldup_ps(a: m128): m128 {.importc: "_mm_moveldup_ps", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
Duplicates low parts of the input Input: { A0, A1, A2, A3 } Result: { A0, A0, A2, A2 }   Source Edit
func mm_movelh_ps(a, b: m128): m128 {.importc: "_mm_movelh_ps", nodecl,
                                      header: "<x86intrin.h>", ...raises: [],
                                      tags: [], forbids: [].}
Input: { A0, A1, A2, A3 }, { B0, B1, B2, B3 } Result: { A0, A1, B0, B1 }   Source Edit
func mm_movemask_epi8(a: m128i): int32 {.importc: "_mm_movemask_epi8", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Returns the most significant bit  of each 8-bit elements in a   Source Edit
func mm_mul_epu32(a: m128i; b: m128i): m128i {.importc: "_mm_mul_epu32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}

From a = {a1_hi, a1_lo, a0_hi, a0_lo} with a1 and a0 being 64-bit number and b = {b1_hi, b1_lo, b0_hi, b0_lo}

Result = {a1_lo * b1_lo, a0_lo * b0_lo}. This is an extended precision multiplication 32x32 -> 64

  Source Edit
func mm_mul_pd(a, b: m128d): m128d {.importc: "_mm_mul_pd", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
  Source Edit
func mm_mul_ps(a, b: m128): m128 {.importc: "_mm_mul_ps", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
func mm_mullo_epi16(a, b: m128i): m128i {.importc: "_mm_mullo_epi16", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Multiply element-wise 2 vectors of 8 16-bit ints into intermediate 8 32-bit ints, and keep the low 16-bit parts   Source Edit
func mm_mullo_epi32(a, b: m128i): m128i {.importc: "_mm_mullo_epi32", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Multiply element-wise 2 vectors of 4 32-bit ints into intermediate 4 64-bit ints, and keep the low 32-bit parts   Source Edit
func mm_or_ps(a, b: m128): m128 {.importc: "_mm_or_ps", nodecl,
                                  header: "<x86intrin.h>", ...raises: [], tags: [],
                                  forbids: [].}
  Source Edit
func mm_or_si128(a, b: m128i): m128i {.importc: "_mm_or_si128", nodecl,
                                       header: "<x86intrin.h>", ...raises: [],
                                       tags: [], forbids: [].}
  Source Edit
func mm_set1_epi8(a: int8 or uint8): m128i {.importc: "_mm_set1_epi8", nodecl,
    header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_set1_epi16(a: int16 or uint16): m128i {.importc: "_mm_set1_epi16",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_set1_epi32(a: int32 or uint32): m128i {.importc: "_mm_set1_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_set1_epi64x(a: int64 or uint64): m128i {.importc: "_mm_set1_epi64x",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_set1_pd(a: float64): m128d {.importc: "_mm_set1_pd", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
  Source Edit
func mm_set1_ps(a: float32): m128 {.importc: "_mm_set1_ps", nodecl,
                                    header: "<x86intrin.h>", ...raises: [],
                                    tags: [], forbids: [].}
  Source Edit
func mm_set_epi32(e3, e2, e1, e0: cint): m128i {.importc: "_mm_set_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Initialize m128i with {e3, e2, e1, e0} (big endian order) Storing it will yield e0, e1, e2, e3   Source Edit
func mm_setzero_pd(): m128d {.importc: "_mm_setzero_pd", nodecl,
                              header: "<x86intrin.h>", ...raises: [], tags: [],
                              forbids: [].}
  Source Edit
func mm_setzero_ps(): m128 {.importc: "_mm_setzero_ps", nodecl,
                             header: "<x86intrin.h>", ...raises: [], tags: [],
                             forbids: [].}
  Source Edit
func mm_setzero_si128(): m128i {.importc: "_mm_setzero_si128", nodecl,
                                 header: "<x86intrin.h>", ...raises: [], tags: [],
                                 forbids: [].}
  Source Edit
func mm_shuffle_epi32(a: m128i; imm8: cint): m128i {.
    importc: "_mm_shuffle_epi32", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
Shuffle 32-bit integers in a according to the control in imm8 Formula is in big endian representation a = {a3, a2, a1, a0} dst = {d3, d2, d1, d0} imm8 = {bits76, bits54, bits32, bits10} d0 will refer abits10 d1 abits32   Source Edit
func mm_slli_epi32(a: m128i; count: int32): m128i {.importc: "_mm_slli_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_slli_epi64(a: m128i; imm8: cint): m128i {.importc: "_mm_slli_epi64",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Shift 2xint64 left   Source Edit
func mm_srli_epi32(a: m128i; count: int32): m128i {.importc: "_mm_srli_epi32",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_srli_epi64(a: m128i; imm8: cint): m128i {.importc: "_mm_srli_epi64",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
Shift 2xint64 right   Source Edit
func mm_store_pd(mem_addr: ptr float64; a: m128d) {.importc: "_mm_store_pd",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_store_ps(mem_addr: ptr float32; a: m128) {.importc: "_mm_store_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_storeu_pd(mem_addr: ptr float64; a: m128d) {.importc: "_mm_storeu_pd",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_storeu_ps(mem_addr: ptr float32; a: m128) {.importc: "_mm_storeu_ps",
    nodecl, header: "<x86intrin.h>", ...raises: [], tags: [], forbids: [].}
  Source Edit
func mm_storeu_si128(mem_addr: ptr m128i; a: m128i) {.
    importc: "_mm_storeu_si128", nodecl, header: "<x86intrin.h>", ...raises: [],
    tags: [], forbids: [].}
  Source Edit
func mm_sub_pd(a, b: m128d): m128d {.importc: "_mm_sub_pd", nodecl,
                                     header: "<x86intrin.h>", ...raises: [],
                                     tags: [], forbids: [].}
  Source Edit
func mm_sub_ps(a, b: m128): m128 {.importc: "_mm_sub_ps", nodecl,
                                   header: "<x86intrin.h>", ...raises: [],
                                   tags: [], forbids: [].}
  Source Edit
Arraymancer Technical reference Tutorial Spellbook (How-To's) Under the hood