Initial commit: SmartCar Framework v0.1 — 龙芯2K0300智能车开发框架\n\n- HAL: GPIO/PWM/Encoder/Framebuffer 驱动\n- Control: PID/IMU/Motor/Servo 控制\n- Vision: HSV双Otsu→4点标定IPM→洪泛填充→逐行搜线\n- Strategy: 三区前瞻偏差+速度策略\n- Debug: 文件热调参+LCD预览+cv截帧\n- Scheduler: 5ms timerfd+epoll 中央调度器
This commit is contained in:
128
bench/bench.cpp
Normal file
128
bench/bench.cpp
Normal file
@@ -0,0 +1,128 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
|
||||
static volatile int32_t g_sink = 0;
|
||||
|
||||
static constexpr int BLK = 512;
|
||||
static int8_t A[BLK*BLK] __attribute__((aligned(64)));
|
||||
static int8_t B[BLK*BLK] __attribute__((aligned(64)));
|
||||
static int32_t C[BLK*BLK] __attribute__((aligned(64)));
|
||||
|
||||
static void init()
|
||||
{
|
||||
for (int i = 0; i < BLK*BLK; ++i) {
|
||||
A[i] = (int8_t)((i * 13 + 7) % 127 - 63);
|
||||
B[i] = (int8_t)((i * 29 + 3) % 127 - 63);
|
||||
}
|
||||
}
|
||||
|
||||
// 运行足够 reps 使时长 >= target_sec
|
||||
static double bench(int M, int N, int K, double target_sec)
|
||||
{
|
||||
auto run = [&](int reps) {
|
||||
auto t0 = Clock::now();
|
||||
for (int r = 0; r < reps; ++r) {
|
||||
for (int i = 0; i < M; ++i) {
|
||||
int8_t* __restrict ar = A + i * K;
|
||||
int32_t* __restrict cr = C + i * N;
|
||||
for (int j = 0; j < N; ++j) {
|
||||
int32_t s = 0;
|
||||
for (int k = 0; k < K; ++k)
|
||||
s += (int32_t)ar[k] * (int32_t)B[k * N + j];
|
||||
cr[j] = s;
|
||||
}
|
||||
}
|
||||
g_sink += C[0];
|
||||
}
|
||||
auto t1 = Clock::now();
|
||||
return std::chrono::duration<double>(t1 - t0).count();
|
||||
};
|
||||
|
||||
// 短跑估计 rep
|
||||
int reps = 1;
|
||||
double t = run(reps);
|
||||
while (t < 0.1 && reps < 10000) { reps *= 2; t = run(reps); }
|
||||
|
||||
// 按 target_sec 缩放 reps
|
||||
int final_reps = (int)(reps * target_sec / t);
|
||||
if (final_reps < 1) final_reps = 1;
|
||||
|
||||
// 正式跑
|
||||
double t_real = run(final_reps);
|
||||
if (t_real < 0.5) {
|
||||
final_reps *= 10;
|
||||
t_real = run(final_reps);
|
||||
}
|
||||
|
||||
double ops = (double)M * N * K * final_reps * 2; // 1 MAC = 2 ops
|
||||
double gops = ops / t_real / 1e9;
|
||||
printf(" (%4dx%4dx%4d) x%6d rep %7.1f ms → %8.3f GOPS (%8.3f GMACS)\n",
|
||||
M, N, K, final_reps, t_real * 1000.0, gops, gops/2.0);
|
||||
return gops;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
printf("═══════════════════════════════════════════\n");
|
||||
printf(" INT8 算力持续压力测试 (每项 ~10s)\n");
|
||||
printf("═══════════════════════════════════════════\n\n");
|
||||
|
||||
init();
|
||||
// warmup
|
||||
bench(64, 64, 64, 1.0);
|
||||
|
||||
const double T = 10.0; // 每项跑 10 秒
|
||||
|
||||
struct { int m,n,k; const char* desc; } tests[] = {
|
||||
{ 16, 16, 16, "微型 16x16x16" },
|
||||
{ 32, 32, 32, "小阵 32x32x32" },
|
||||
{ 64, 64, 64, "中阵 64x64x64" },
|
||||
{128, 64, 64, "宽中 128x64x64" },
|
||||
{256, 64, 64, "宽大 256x64x64" },
|
||||
{128,128, 64, "大方 128x128x64" },
|
||||
{ 64,128, 64, "长条 64x128x64" },
|
||||
{ 64, 64,128, "深乘 64x64x128" },
|
||||
{ 32,128,128, "扁平 32x128x128" },
|
||||
};
|
||||
|
||||
double sum = 0; int cnt = 0;
|
||||
for (auto& t : tests)
|
||||
{
|
||||
double g = bench(t.m, t.n, t.k, T);
|
||||
sum += g; cnt++;
|
||||
}
|
||||
|
||||
double avg = sum / cnt;
|
||||
printf("\n═══════════════════════════════════════════\n");
|
||||
printf(" INT8 平均: %.3f GOPS\n", avg);
|
||||
printf("═══════════════════════════════════════════\n");
|
||||
|
||||
printf("\n 模型帧率预估 (INT8):\n");
|
||||
printf(" %-30s %8s %8s\n", "模型", "MACs(M)", "FPS");
|
||||
printf(" ----------------------------------------------------\n");
|
||||
|
||||
struct { const char* n; double m; } models[] = {
|
||||
{"TinyCNN 40x30x3 -> 8ch k3", 3},
|
||||
{"TinyCNN 40x30x8 -> 16ch k3", 8},
|
||||
{"TinyCNN 40x30x16 -> 32ch k3", 18},
|
||||
{"TinyCNN 56x56x3 -> 16ch k3", 12},
|
||||
{"TinyCNN 56x56x16 -> 32ch k3", 45},
|
||||
{"MicroNet 32x32x8, 3层", 5},
|
||||
{"MicroNet 32x32x16, 3层", 15},
|
||||
{"MicroNet 48x48x8, 3层", 12},
|
||||
{"MicroNet 48x48x16, 3层", 40},
|
||||
{"ShuffleNetV2 0.5x", 41},
|
||||
{"MobileNetV2 0.35x 96x96", 35},
|
||||
{"FC-128 (分类头)", 0.1},
|
||||
};
|
||||
|
||||
for (auto& m : models)
|
||||
printf(" %-30s %8.0f %8.1f\n", m.n, m.m, avg * 1e3 / m.m);
|
||||
|
||||
printf("\n >>> 稳 30FPS 上限: %.0fM MACs (INT8 %.3f GOPS)\n\n", avg * 1e3 / 30.0, avg);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user