Files
Loongson_2k0300_SmartCar_Fr…/model/model.cpp

293 lines
11 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "model.hpp"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <new>
#include <algorithm>
// ============================================================
// 内存预分配
// ============================================================
struct M {
float stem[8*60*80];
float b1[16*60*80]; float b1_tmp[16*60*80];
float b2[28*30*40]; float b2_dw[16*30*40];
float b3[40*30*40]; float b3_dw[28*30*40];
float b4[56*15*20]; float b4_dw[40*15*20];
float b5[64*15*20]; float b5_dw[56*15*20];
float sh[24*15*20];
float cls[4*15*20];
float sz[2*15*20];
float dw_out[16*60*80]; // max(block1 skip 16×60×80, other dw)
};
static M* m = nullptr;
// ============================================================
// 权重表
// ============================================================
struct WT { char n[64]; int nd; int s[4]; float* d; };
static WT* gw = nullptr; static int gn = 0;
static float* wf(const char* name) {
for (int i = 0; i < gn; ++i) if (!std::strcmp(gw[i].n, name)) return gw[i].d;
printf("[MODEL] MISS %s\n", name); return nullptr;
}
// ============================================================
// 基础算子
// ============================================================
static void relu_f(float* x, int N) { for (int i=0;i<N;++i) if (x[i]<0) x[i]=0; }
static void sigmoid_f(float* x, int N) { for (int i=0;i<N;++i) x[i]=1.0f/(1.0f+std::exp(-x[i])); }
static void conv2d(float* o, const float* in, const float* w, const float* bias,
int H, int W, int iC, int oC, int K, int str, int grp)
{
int Ho=H/str, Wo=W/str, pad=K/2;
int icpg = iC/grp, ocpg = oC/grp;
for (int g=0; g<grp; ++g) {
for (int oc=0; oc<ocpg; ++oc) {
int occ = g*ocpg + oc;
float* oo = o + occ*Ho*Wo;
for (int y=0; y<Ho; ++y) for (int x=0; x<Wo; ++x) {
float s = bias ? bias[occ] : 0;
for (int ic=0; ic<icpg; ++ic) {
int icc = g*icpg + ic;
const float* ww = w + ((occ*iC + icc)*K*K);
const float* ii = in + icc*H*W;
for (int ky=0; ky<K; ++ky) {
int iy = y*str + ky - pad;
if (iy<0||iy>=H) continue;
for (int kx=0; kx<K; ++kx) {
int ix = x*str + kx - pad;
if (ix<0||ix>=W) continue;
s += ww[ky*K+kx] * ii[iy*W+ix];
}
}
}
oo[y*Wo+x] = s;
}
}
}
}
static void batchnorm_f(float* x, const float* w, const float* b,
const float* mean, const float* var, int C, int N)
{
const float eps = 1e-5f;
for (int c=0; c<C; ++c) {
float W = w?w[c]:1, B = b?b[c]:0;
float iv = 1.0f/std::sqrt(var[c]+eps);
float* xc = x + c*N;
for (int i=0; i<N; ++i) xc[i] = (xc[i]-mean[c])*iv*W + B;
}
}
// conv + BN + optional ReLU
static void conv_bn(float* o, const float* in, int H, int W,
const float* cw, const float* cb,
const float* bn_w, const float* bn_b,
const float* bn_m, const float* bn_v,
int iC, int oC, int K, int str, int grp, bool relu_flag)
{
conv2d(o, in, cw, nullptr, H, W, iC, oC, K, str, grp);
int N = (H/str)*(W/str);
batchnorm_f(o, bn_w, bn_b, bn_m, bn_v, oC, N);
if (relu_flag) relu_f(o, oC*N);
}
// global avg pool
static void gap(float* o, const float* in, int C, int H, int W) {
int N=H*W;
for (int c=0; c<C; ++c) {
float s=0; const float* ic=in+c*N;
for (int i=0; i<N; ++i) s+=ic[i];
o[c]=s/(float)N;
}
}
static void softmax_c(float* x, int C, int N) {
for (int i=0; i<N; ++i) {
float mx=-1e9f;
for (int c=0; c<C; ++c) mx=std::max(mx, x[c*N+i]);
float sum=0;
for (int c=0; c<C; ++c) { x[c*N+i]=std::exp(x[c*N+i]-mx); sum+=x[c*N+i]; }
for (int c=0; c<C; ++c) x[c*N+i]/=sum;
}
}
// ============================================================
// SE-ResDW Block
// ============================================================
static void se_block(float* o, const float* in, int iC, int oC, int H, int W, int str, const char* pfx)
{
int Ho=H/str, Wo=W/str, No=Ho*Wo;
char na[128];
// depthwise conv + BN + ReLU
std::snprintf(na,128,"%s.dw.0.weight",pfx);
conv2d(m->dw_out, in, wf(na), nullptr, H, W, iC, iC, 3, str, iC);
std::snprintf(na,128,"%s.dw.1.weight",pfx);
batchnorm_f(m->dw_out, wf(na), wf((std::snprintf(na,128,"%s.dw.1.bias",pfx),na)),
wf((std::snprintf(na,128,"%s.dw.1.running_mean",pfx),na)),
wf((std::snprintf(na,128,"%s.dw.1.running_var",pfx),na)), iC, iC*No);
relu_f(m->dw_out, iC*No);
// pointwise conv + BN + ReLU (to output)
std::snprintf(na,128,"%s.pw.0.weight",pfx);
conv2d(o, m->dw_out, wf(na), nullptr, Ho, Wo, iC, oC, 1, 1, 1);
std::snprintf(na,128,"%s.pw.1.weight",pfx);
batchnorm_f(o, wf(na), wf((std::snprintf(na,128,"%s.pw.1.bias",pfx),na)),
wf((std::snprintf(na,128,"%s.pw.1.running_mean",pfx),na)),
wf((std::snprintf(na,128,"%s.pw.1.running_var",pfx),na)), oC, oC*No);
relu_f(o, oC*No);
// skip (no ReLU)
float* skip = m->dw_out; // reuse buffer
if (str==1 && iC==oC) {
std::memcpy(skip, in, iC*H*W*4);
} else {
std::snprintf(na,128,"%s.skip.0.weight",pfx);
conv2d(skip, in, wf(na), nullptr, H, W, iC, oC, 1, str, 1);
std::snprintf(na,128,"%s.skip.1.weight",pfx);
batchnorm_f(skip, wf(na), wf((std::snprintf(na,128,"%s.skip.1.bias",pfx),na)),
wf((std::snprintf(na,128,"%s.skip.1.running_mean",pfx),na)),
wf((std::snprintf(na,128,"%s.skip.1.running_var",pfx),na)), oC, oC*No);
}
// add skip
for (int i=0; i<oC*No; ++i) o[i] += skip[i];
// SE (输入是 o, 即 pw+skip 的和)
float se_avg[64]; gap(se_avg, o, oC, Ho, Wo);
int rc = oC/4;
float se1[16], se2[64];
std::snprintf(na,128,"%s.se.1.weight",pfx);
float* se1w=wf(na); std::snprintf(na,128,"%s.se.1.bias",pfx); float* se1b=wf(na);
for (int i=0; i<rc; ++i) { float s=se1b?se1b[i]:0; for (int j=0;j<oC;++j) s+=se1w[i*oC+j]*se_avg[j]; se1[i]=s; }
relu_f(se1, rc);
std::snprintf(na,128,"%s.se.3.weight",pfx);
float* se2w=wf(na); std::snprintf(na,128,"%s.se.3.bias",pfx); float* se2b=wf(na);
for (int i=0; i<oC; ++i) { float s=se2b?se2b[i]:0; for (int j=0;j<rc;++j) s+=se2w[i*rc+j]*se1[j]; se2[i]=s; }
sigmoid_f(se2, oC);
for (int c=0; c<oC; ++c) { float sc=se2[c]; float* oc=o+c*No; for (int i=0;i<No;++i) oc[i]*=sc; }
relu_f(o, oC*No);
}
// ============================================================
// 检测后处理
// ============================================================
static constexpr int OH=15, OW=20, ST=8, NC=3;
static constexpr float RS[3][2] = {{75.36f,62.15f},{49.79f,38.63f},{67.59f,34.94f}};
static int decode(DetectBox* boxes, int max, float th)
{
int N=OH*OW;
softmax_c(m->cls, 4, N);
int cnt=0;
for (int gy=0; gy<OH && cnt<max; ++gy)
for (int gx=0; gx<OW && cnt<max; ++gx)
{
int idx = gy*OW + gx;
int bc=-1; float bs=0;
for (int c=0; c<NC; ++c) { float s=m->cls[c*N+idx]; if (s>bs) {bs=s; bc=c;} }
if (bs<th) continue;
bool pk=true;
for (int dy=-1; dy<=1&&pk; ++dy)
for (int dx=-1; dx<=1&&pk; ++dx)
{
int ny=gy+dy, nx=gx+dx;
if (ny<0||ny>=OH||nx<0||nx>=OW) continue;
if (m->cls[bc*N+ny*OW+nx] > bs) pk=false;
}
if (!pk) continue;
float pw=m->sz[0*N+idx], ph=m->sz[1*N+idx];
boxes[cnt].cls=bc; boxes[cnt].conf=bs;
boxes[cnt].cx=((float)gx+0.5f)*ST; boxes[cnt].cy=((float)gy+0.5f)*ST;
boxes[cnt].w=pw*RS[bc][0]; boxes[cnt].h=ph*RS[bc][1];
cnt++;
}
return cnt;
}
// ============================================================
// 前向推理
// ============================================================
static void forward(const uint8* bgr)
{
// 预处理 BGR→RGB float [0,1], CHW (堆分配, 避免栈溢出)
float* in = new float[3*120*160];
for (int c=0; c<3; ++c) {
int sc=2-c; float* ch=in+c*120*160;
for (int y=0; y<120; ++y) {
const uint8* row=bgr+y*160*3;
for (int x=0; x<160; ++x) ch[y*160+x]=(float)row[x*3+sc]/255.0f;
}
}
// stem
conv_bn(m->stem, in, 120,160, wf("stem.0.weight"),nullptr,
wf("stem.1.weight"),wf("stem.1.bias"),
wf("stem.1.running_mean"),wf("stem.1.running_var"),
3,8,3,2,1,true);
se_block(m->b1, m->stem, 8,16, 60,80, 1,"block1");
se_block(m->b2, m->b1, 16,28, 60,80, 2,"block2");
se_block(m->b3, m->b2, 28,40, 30,40, 1,"block3");
se_block(m->b4, m->b3, 40,56, 30,40, 2,"block4");
se_block(m->b5, m->b4, 56,64, 15,20, 1,"block5");
// shared
conv_bn(m->sh, m->b5, 15,20, wf("shared.0.weight"),nullptr,
wf("shared.1.weight"),wf("shared.1.bias"),
wf("shared.1.running_mean"),wf("shared.1.running_var"),
64,24,1,1,1,true);
// heads (1x1 conv, 无 BN/ReLU)
conv2d(m->cls, m->sh, wf("cls_head.weight"), wf("cls_head.bias"), 15,20, 24,4, 1,1,1);
conv2d(m->sz, m->sh, wf("size_head.weight"),wf("size_head.bias"),15,20, 24,2, 1,1,1);
delete[] in;
}
// ============================================================
// 接口
// ============================================================
static bool g_rdy = false;
bool model_init(const char* path) {
FILE* f=fopen(path,"rb");
if(!f){ printf("[MODEL] open fail: %s\n",path); return false; }
fread(&gn,sizeof(int),1,f);
gw=new WT[gn];
for(int i=0;i<gn;++i){ WT& t=gw[i]; int nl; fread(&nl,4,1,f); fread(t.n,1,nl,f); t.n[nl]=0;
fread(&t.nd,4,1,f); int tot=1; for(int d=0;d<t.nd;++d){ fread(&t.s[d],4,1,f); tot*=t.s[d]; }
for(int d=t.nd;d<4;++d) t.s[d]=1;
t.d=new float[tot]; fread(t.d,4,tot,f);
}
fclose(f);
m=new (std::nothrow) M();
if (!m) { printf("[MODEL] OOM: 无法分配推理内存\n"); fclose(f); delete[] gw; gw=nullptr; return false; }
std::memset(m,0,sizeof(M));
g_rdy=true; printf("[MODEL] load ok: %d layers\n",gn); return true;
}
int model_detect(const uint8* bgr, int w, int h, DetectBox* boxes, int max, float th) {
if(!g_rdy||w!=160||h!=120) return 0;
forward(bgr);
return decode(boxes, max, th);
}
void model_deinit() {
if(gw){ for(int i=0;i<gn;++i) delete[] gw[i].d; delete[] gw; gw=nullptr; }
delete m; m=nullptr; g_rdy=false;
}
bool model_ready() { return g_rdy; }