请问spacemit K1全面支持rvv1.0吗?例如vrgather指令,我这边显示非法指令。如果不支持的话,那么spacemiT K3支持不?
K1支持的。
让AI写了个测试
/*
* vrgather RVV 1.0 指令测试程序
* 编译命令:
* riscv64-unknown-linux-gnu-gcc -O1 -march=rv64gcv -mabi=lp64d -o vrgather_test vrgather_test.c
* 或 (如果工具链支持):
* gcc -O1 -march=rv64gcv -mabi=lp64d -o vrgather_test vrgather_test.c
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
/* -------------------------------------------------------
* 测试1: vrgather.vv
* 用索引向量从源向量中 gather 元素
* src = [10, 20, 30, 40, 50, 60, 70, 80]
* idx = [ 3, 0, 2, 7, 1, 5, 4, 6]
* dst = [40, 10, 30, 80, 20, 60, 50, 70]
* ------------------------------------------------------- */
static int test_vrgather_vv(void)
{
uint32_t src[8] = {10, 20, 30, 40, 50, 60, 70, 80};
uint32_t idx[8] = { 3, 0, 2, 7, 1, 5, 4, 6};
uint32_t dst[8] = {0};
uint32_t expected[8] = {40, 10, 30, 80, 20, 60, 50, 70};
__asm__ volatile (
"vsetvli t0, %[n], e32, m1, ta, ma\n\t"
"vle32.v v0, (%[src])\n\t" /* v0 = src */
"vle32.v v1, (%[idx])\n\t" /* v1 = idx */
"vrgather.vv v2, v0, v1\n\t" /* v2 = src[idx] */
"vse32.v v2, (%[dst])\n\t"
:
: [src] "r"(src), [idx] "r"(idx), [dst] "r"(dst),
[n] "r"((unsigned long)8)
: "t0", "memory"
);
int pass = (memcmp(dst, expected, sizeof(dst)) == 0);
printf("[vrgather.vv] %s\n", pass ? "PASS" : "FAIL");
if (!pass) {
printf(" got: ");
for (int i = 0; i < 8; i++) printf("%3u ", dst[i]);
printf("\n expected: ");
for (int i = 0; i < 8; i++) printf("%3u ", expected[i]);
printf("\n");
}
return pass;
}
/* -------------------------------------------------------
* 测试2: vrgather.vx
* 用单个标量寄存器作为广播索引,相当于 vector splat
* src = [10, 20, 30, 40, 50, 60, 70, 80]
* index = 5 (scalar)
* dst = [60, 60, 60, 60, 60, 60, 60, 60]
* ------------------------------------------------------- */
static int test_vrgather_vx(void)
{
uint32_t src[8] = {10, 20, 30, 40, 50, 60, 70, 80};
uint32_t dst[8] = {0};
unsigned long scalar_idx = 5;
uint32_t expected[8] = {60, 60, 60, 60, 60, 60, 60, 60};
__asm__ volatile (
"vsetvli t0, %[n], e32, m1, ta, ma\n\t"
"vle32.v v0, (%[src])\n\t"
"vrgather.vx v1, v0, %[sidx]\n\t"
"vse32.v v1, (%[dst])\n\t"
:
: [src] "r"(src), [dst] "r"(dst),
[sidx] "r"(scalar_idx), [n] "r"((unsigned long)8)
: "t0", "memory"
);
int pass = (memcmp(dst, expected, sizeof(dst)) == 0);
printf("[vrgather.vx] %s\n", pass ? "PASS" : "FAIL");
if (!pass) {
printf(" got: ");
for (int i = 0; i < 8; i++) printf("%3u ", dst[i]);
printf("\n expected: ");
for (int i = 0; i < 8; i++) printf("%3u ", expected[i]);
printf("\n");
}
return pass;
}
/* -------------------------------------------------------
* 测试3: vrgather.vi
* 立即数索引(0‒31),广播 src[2]
* src = [10, 20, 30, 40, 50, 60, 70, 80]
* dst = [30, 30, 30, 30, 30, 30, 30, 30]
* ------------------------------------------------------- */
static int test_vrgather_vi(void)
{
uint32_t src[8] = {10, 20, 30, 40, 50, 60, 70, 80};
uint32_t dst[8] = {0};
uint32_t expected[8] = {30, 30, 30, 30, 30, 30, 30, 30};
__asm__ volatile (
"vsetvli t0, %[n], e32, m1, ta, ma\n\t"
"vle32.v v0, (%[src])\n\t"
"vrgather.vi v1, v0, 2\n\t" /* 立即数固定为 2 */
"vse32.v v1, (%[dst])\n\t"
:
: [src] "r"(src), [dst] "r"(dst), [n] "r"((unsigned long)8)
: "t0", "memory"
);
int pass = (memcmp(dst, expected, sizeof(dst)) == 0);
printf("[vrgather.vi] %s\n", pass ? "PASS" : "FAIL");
if (!pass) {
printf(" got: ");
for (int i = 0; i < 8; i++) printf("%3u ", dst[i]);
printf("\n expected: ");
for (int i = 0; i < 8; i++) printf("%3u ", expected[i]);
printf("\n");
}
return pass;
}
/* -------------------------------------------------------
* 测试4: vrgatherei16.vv
* 使用 16-bit 索引向量,适合 SEW > 16 时节省索引空间
* src = [100, 200, 300, 400] (e32)
* idx = [3, 1, 0, 2] (e16)
* dst = [400, 200, 100, 300]
* ------------------------------------------------------- */
static int test_vrgatherei16(void)
{
uint32_t src[4] = {100, 200, 300, 400};
uint16_t idx[4] = { 3, 1, 0, 2};
uint32_t dst[4] = {0};
uint32_t expected[4] = {400, 200, 100, 300};
__asm__ volatile (
"vsetvli t0, %[n], e32, m1, ta, ma\n\t"
"vle32.v v0, (%[src])\n\t"
/* 切换到 e16 加载索引,再切回 e32 */
"vsetvli t0, %[n], e16, m1, ta, ma\n\t"
"vle16.v v1, (%[idx])\n\t"
"vsetvli t0, %[n], e32, m1, ta, ma\n\t"
"vrgatherei16.vv v2, v0, v1\n\t"
"vse32.v v2, (%[dst])\n\t"
:
: [src] "r"(src), [idx] "r"(idx), [dst] "r"(dst),
[n] "r"((unsigned long)4)
: "t0", "memory"
);
int pass = (memcmp(dst, expected, sizeof(dst)) == 0);
printf("[vrgatherei16] %s\n", pass ? "PASS" : "FAIL");
if (!pass) {
printf(" got: ");
for (int i = 0; i < 4; i++) printf("%4u ", dst[i]);
printf("\n expected: ");
for (int i = 0; i < 4; i++) printf("%4u ", expected[i]);
printf("\n");
}
return pass;
}
int main(void)
{
printf("=== RVV 1.0 vrgather 指令测试 ===\n\n");
int total = 0, passed = 0;
passed += test_vrgather_vv(); total++;
passed += test_vrgather_vx(); total++;
passed += test_vrgather_vi(); total++;
passed += test_vrgatherei16(); total++;
printf("\n结果: %d / %d 通过\n", passed, total);
return (passed == total) ? 0 : 1;
}
K1上的结果如下
root@K1:~# gcc -march=rv64gcv -o vrgather_test vrgather_test.c
root@K1:~# objdump -d vrgather_test | grep "vrgather\.v"
930: 32008157 vrgather.vv v2,v0,v1
ae8: 3206c0d7 vrgather.vx v1,v0,a3
c96: 320130d7 vrgather.vi v1,v0,2
root@K1:~# ./vrgather_test
=== RVV 1.0 vrgather 指令测试 ===
[vrgather.vv] PASS
[vrgather.vx] PASS
[vrgather.vi] PASS
[vrgatherei16] PASS
结果: 4 / 4 通过