Add async model thread, vision FPS throttle, configurable FPS, .gitignore __pycache__
This commit is contained in:
288
model/model.cpp
288
model/model.cpp
@@ -1,13 +1,287 @@
|
||||
#include "model.hpp"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
bool model_init()
|
||||
{
|
||||
// TODO: 加载 TFLite 模型,初始化解释器
|
||||
return false;
|
||||
// ============================================================
|
||||
// 内存预分配
|
||||
// ============================================================
|
||||
struct M {
|
||||
float stem[8*60*80];
|
||||
float b1[16*60*80]; float b1_tmp[16*60*80];
|
||||
float b2[28*30*40]; float b2_dw[16*30*40];
|
||||
float b3[40*30*40]; float b3_dw[28*30*40];
|
||||
float b4[56*15*20]; float b4_dw[40*15*20];
|
||||
float b5[64*15*20]; float b5_dw[56*15*20];
|
||||
float sh[24*15*20];
|
||||
float cls[4*15*20];
|
||||
float sz[2*15*20];
|
||||
float dw_out[64*60*80]; // max(8*60*80, 16*30*40, ...) = 38400
|
||||
};
|
||||
static M* m = nullptr;
|
||||
|
||||
// ============================================================
|
||||
// 权重表
|
||||
// ============================================================
|
||||
struct WT { char n[64]; int nd; int s[4]; float* d; };
|
||||
static WT* gw = nullptr; static int gn = 0;
|
||||
|
||||
static float* wf(const char* name) {
|
||||
for (int i = 0; i < gn; ++i) if (!std::strcmp(gw[i].n, name)) return gw[i].d;
|
||||
printf("[MODEL] MISS %s\n", name); return nullptr;
|
||||
}
|
||||
|
||||
void model_infer(const uint8* image, int w, int h, float* output, int num_classes)
|
||||
// ============================================================
|
||||
// 基础算子
|
||||
// ============================================================
|
||||
static void relu_f(float* x, int N) { for (int i=0;i<N;++i) if (x[i]<0) x[i]=0; }
|
||||
static void sigmoid_f(float* x, int N) { for (int i=0;i<N;++i) x[i]=1.0f/(1.0f+std::exp(-x[i])); }
|
||||
|
||||
static void conv2d(float* o, const float* in, const float* w, const float* bias,
|
||||
int H, int W, int iC, int oC, int K, int str, int grp)
|
||||
{
|
||||
// TODO: 运行推理,填充 output[0..num_classes-1]
|
||||
for (int i = 0; i < num_classes; ++i) output[i] = 0.0f;
|
||||
int Ho=H/str, Wo=W/str, pad=K/2;
|
||||
int icpg = iC/grp, ocpg = oC/grp;
|
||||
for (int g=0; g<grp; ++g) {
|
||||
for (int oc=0; oc<ocpg; ++oc) {
|
||||
int occ = g*ocpg + oc;
|
||||
float* oo = o + occ*Ho*Wo;
|
||||
for (int y=0; y<Ho; ++y) for (int x=0; x<Wo; ++x) {
|
||||
float s = bias ? bias[occ] : 0;
|
||||
for (int ic=0; ic<icpg; ++ic) {
|
||||
int icc = g*icpg + ic;
|
||||
const float* ww = w + ((occ*iC + icc)*K*K);
|
||||
const float* ii = in + icc*H*W;
|
||||
for (int ky=0; ky<K; ++ky) {
|
||||
int iy = y*str + ky - pad;
|
||||
if (iy<0||iy>=H) continue;
|
||||
for (int kx=0; kx<K; ++kx) {
|
||||
int ix = x*str + kx - pad;
|
||||
if (ix<0||ix>=W) continue;
|
||||
s += ww[ky*K+kx] * ii[iy*W+ix];
|
||||
}
|
||||
}
|
||||
}
|
||||
oo[y*Wo+x] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void batchnorm_f(float* x, const float* w, const float* b,
|
||||
const float* mean, const float* var, int C, int N)
|
||||
{
|
||||
const float eps = 1e-5f;
|
||||
for (int c=0; c<C; ++c) {
|
||||
float W = w?w[c]:1, B = b?b[c]:0;
|
||||
float iv = 1.0f/std::sqrt(var[c]+eps);
|
||||
float* xc = x + c*N;
|
||||
for (int i=0; i<N; ++i) xc[i] = (xc[i]-mean[c])*iv*W + B;
|
||||
}
|
||||
}
|
||||
|
||||
// conv + BN + optional ReLU
|
||||
static void conv_bn(float* o, const float* in, int H, int W,
|
||||
const float* cw, const float* cb,
|
||||
const float* bn_w, const float* bn_b,
|
||||
const float* bn_m, const float* bn_v,
|
||||
int iC, int oC, int K, int str, int grp, bool relu_flag)
|
||||
{
|
||||
conv2d(o, in, cw, nullptr, H, W, iC, oC, K, str, grp);
|
||||
int N = (H/str)*(W/str);
|
||||
batchnorm_f(o, bn_w, bn_b, bn_m, bn_v, oC, N);
|
||||
if (relu_flag) relu_f(o, oC*N);
|
||||
}
|
||||
|
||||
// global avg pool
|
||||
static void gap(float* o, const float* in, int C, int H, int W) {
|
||||
int N=H*W;
|
||||
for (int c=0; c<C; ++c) {
|
||||
float s=0; const float* ic=in+c*N;
|
||||
for (int i=0; i<N; ++i) s+=ic[i];
|
||||
o[c]=s/(float)N;
|
||||
}
|
||||
}
|
||||
|
||||
static void softmax_c(float* x, int C, int N) {
|
||||
for (int i=0; i<N; ++i) {
|
||||
float mx=-1e9f;
|
||||
for (int c=0; c<C; ++c) mx=std::max(mx, x[c*N+i]);
|
||||
float sum=0;
|
||||
for (int c=0; c<C; ++c) { x[c*N+i]=std::exp(x[c*N+i]-mx); sum+=x[c*N+i]; }
|
||||
for (int c=0; c<C; ++c) x[c*N+i]/=sum;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// SE-ResDW Block
|
||||
// ============================================================
|
||||
static void se_block(float* o, const float* in, int iC, int oC, int H, int W, int str, const char* pfx)
|
||||
{
|
||||
int Ho=H/str, Wo=W/str, No=Ho*Wo;
|
||||
char na[128];
|
||||
|
||||
// depthwise conv + BN + ReLU
|
||||
std::snprintf(na,128,"%s.dw.0.weight",pfx);
|
||||
conv2d(m->dw_out, in, wf(na), nullptr, H, W, iC, iC, 3, str, iC);
|
||||
std::snprintf(na,128,"%s.dw.1.weight",pfx);
|
||||
batchnorm_f(m->dw_out, wf(na), wf((std::snprintf(na,128,"%s.dw.1.bias",pfx),na)),
|
||||
wf((std::snprintf(na,128,"%s.dw.1.running_mean",pfx),na)),
|
||||
wf((std::snprintf(na,128,"%s.dw.1.running_var",pfx),na)), iC, iC*No);
|
||||
relu_f(m->dw_out, iC*No);
|
||||
|
||||
// pointwise conv + BN + ReLU (to output)
|
||||
std::snprintf(na,128,"%s.pw.0.weight",pfx);
|
||||
conv2d(o, m->dw_out, wf(na), nullptr, Ho, Wo, iC, oC, 1, 1, 1);
|
||||
std::snprintf(na,128,"%s.pw.1.weight",pfx);
|
||||
batchnorm_f(o, wf(na), wf((std::snprintf(na,128,"%s.pw.1.bias",pfx),na)),
|
||||
wf((std::snprintf(na,128,"%s.pw.1.running_mean",pfx),na)),
|
||||
wf((std::snprintf(na,128,"%s.pw.1.running_var",pfx),na)), oC, oC*No);
|
||||
relu_f(o, oC*No);
|
||||
|
||||
// skip (no ReLU)
|
||||
float* skip = m->dw_out; // reuse buffer
|
||||
if (str==1 && iC==oC) {
|
||||
std::memcpy(skip, in, iC*H*W*4);
|
||||
} else {
|
||||
std::snprintf(na,128,"%s.skip.0.weight",pfx);
|
||||
conv2d(skip, in, wf(na), nullptr, H, W, iC, oC, 1, str, 1);
|
||||
std::snprintf(na,128,"%s.skip.1.weight",pfx);
|
||||
batchnorm_f(skip, wf(na), wf((std::snprintf(na,128,"%s.skip.1.bias",pfx),na)),
|
||||
wf((std::snprintf(na,128,"%s.skip.1.running_mean",pfx),na)),
|
||||
wf((std::snprintf(na,128,"%s.skip.1.running_var",pfx),na)), oC, oC*No);
|
||||
}
|
||||
|
||||
// add skip
|
||||
for (int i=0; i<oC*No; ++i) o[i] += skip[i];
|
||||
|
||||
// SE (输入是 o, 即 pw+skip 的和)
|
||||
float se_avg[64]; gap(se_avg, o, oC, Ho, Wo);
|
||||
|
||||
int rc = oC/4;
|
||||
float se1[16], se2[64];
|
||||
std::snprintf(na,128,"%s.se.1.weight",pfx);
|
||||
float* se1w=wf(na); std::snprintf(na,128,"%s.se.1.bias",pfx); float* se1b=wf(na);
|
||||
for (int i=0; i<rc; ++i) { float s=se1b?se1b[i]:0; for (int j=0;j<oC;++j) s+=se1w[i*oC+j]*se_avg[j]; se1[i]=s; }
|
||||
relu_f(se1, rc);
|
||||
|
||||
std::snprintf(na,128,"%s.se.3.weight",pfx);
|
||||
float* se2w=wf(na); std::snprintf(na,128,"%s.se.3.bias",pfx); float* se2b=wf(na);
|
||||
for (int i=0; i<oC; ++i) { float s=se2b?se2b[i]:0; for (int j=0;j<rc;++j) s+=se2w[i*rc+j]*se1[j]; se2[i]=s; }
|
||||
sigmoid_f(se2, oC);
|
||||
|
||||
for (int c=0; c<oC; ++c) { float sc=se2[c]; float* oc=o+c*No; for (int i=0;i<No;++i) oc[i]*=sc; }
|
||||
relu_f(o, oC*No);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 检测后处理
|
||||
// ============================================================
|
||||
static constexpr int OH=15, OW=20, ST=8, NC=3;
|
||||
static constexpr float RS[3][2] = {{75.36f,62.15f},{49.79f,38.63f},{67.59f,34.94f}};
|
||||
|
||||
static int decode(DetectBox* boxes, int max, float th)
|
||||
{
|
||||
int N=OH*OW;
|
||||
softmax_c(m->cls, 4, N);
|
||||
int cnt=0;
|
||||
|
||||
for (int gy=0; gy<OH && cnt<max; ++gy)
|
||||
for (int gx=0; gx<OW && cnt<max; ++gx)
|
||||
{
|
||||
int idx = gy*OW + gx;
|
||||
int bc=-1; float bs=0;
|
||||
for (int c=0; c<NC; ++c) { float s=m->cls[c*N+idx]; if (s>bs) {bs=s; bc=c;} }
|
||||
if (bs<th) continue;
|
||||
|
||||
bool pk=true;
|
||||
for (int dy=-1; dy<=1&&pk; ++dy)
|
||||
for (int dx=-1; dx<=1&&pk; ++dx)
|
||||
{
|
||||
int ny=gy+dy, nx=gx+dx;
|
||||
if (ny<0||ny>=OH||nx<0||nx>=OW) continue;
|
||||
if (m->cls[bc*N+ny*OW+nx] > bs) pk=false;
|
||||
}
|
||||
if (!pk) continue;
|
||||
|
||||
float pw=m->sz[0*N+idx], ph=m->sz[1*N+idx];
|
||||
boxes[cnt].cls=bc; boxes[cnt].conf=bs;
|
||||
boxes[cnt].cx=((float)gx+0.5f)*ST; boxes[cnt].cy=((float)gy+0.5f)*ST;
|
||||
boxes[cnt].w=pw*RS[bc][0]; boxes[cnt].h=ph*RS[bc][1];
|
||||
cnt++;
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 前向推理
|
||||
// ============================================================
|
||||
static void forward(const uint8* bgr)
|
||||
{
|
||||
// 预处理 BGR→RGB float [0,1], CHW
|
||||
float in[3*120*160];
|
||||
for (int c=0; c<3; ++c) {
|
||||
int sc=2-c; float* ch=in+c*120*160;
|
||||
for (int y=0; y<120; ++y) {
|
||||
const uint8* row=bgr+y*160*3;
|
||||
for (int x=0; x<160; ++x) ch[y*160+x]=(float)row[x*3+sc]/255.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// stem
|
||||
conv_bn(m->stem, in, 120,160, wf("stem.0.weight"),nullptr,
|
||||
wf("stem.1.weight"),wf("stem.1.bias"),
|
||||
wf("stem.1.running_mean"),wf("stem.1.running_var"),
|
||||
3,8,3,2,1,true);
|
||||
|
||||
se_block(m->b1, m->stem, 8,16, 60,80, 1,"block1");
|
||||
se_block(m->b2, m->b1, 16,28, 60,80, 2,"block2");
|
||||
se_block(m->b3, m->b2, 28,40, 30,40, 1,"block3");
|
||||
se_block(m->b4, m->b3, 40,56, 30,40, 2,"block4");
|
||||
se_block(m->b5, m->b4, 56,64, 15,20, 1,"block5");
|
||||
|
||||
// shared
|
||||
conv_bn(m->sh, m->b5, 15,20, wf("shared.0.weight"),nullptr,
|
||||
wf("shared.1.weight"),wf("shared.1.bias"),
|
||||
wf("shared.1.running_mean"),wf("shared.1.running_var"),
|
||||
64,24,1,1,1,true);
|
||||
|
||||
// heads (1x1 conv, 无 BN/ReLU)
|
||||
conv2d(m->cls, m->sh, wf("cls_head.weight"), wf("cls_head.bias"), 15,20, 24,4, 1,1,1);
|
||||
conv2d(m->sz, m->sh, wf("size_head.weight"),wf("size_head.bias"),15,20, 24,2, 1,1,1);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 接口
|
||||
// ============================================================
|
||||
static bool g_rdy = false;
|
||||
|
||||
bool model_init(const char* path) {
|
||||
FILE* f=fopen(path,"rb");
|
||||
if(!f){ printf("[MODEL] open fail: %s\n",path); return false; }
|
||||
fread(&gn,sizeof(int),1,f);
|
||||
gw=new WT[gn];
|
||||
for(int i=0;i<gn;++i){ WT& t=gw[i]; int nl; fread(&nl,4,1,f); fread(t.n,1,nl,f); t.n[nl]=0;
|
||||
fread(&t.nd,4,1,f); int tot=1; for(int d=0;d<t.nd;++d){ fread(&t.s[d],4,1,f); tot*=t.s[d]; }
|
||||
for(int d=t.nd;d<4;++d) t.s[d]=1;
|
||||
t.d=new float[tot]; fread(t.d,4,tot,f);
|
||||
}
|
||||
fclose(f);
|
||||
m=new M(); std::memset(m,0,sizeof(M));
|
||||
g_rdy=true; printf("[MODEL] load ok: %d layers\n",gn); return true;
|
||||
}
|
||||
|
||||
int model_detect(const uint8* bgr, int w, int h, DetectBox* boxes, int max, float th) {
|
||||
if(!g_rdy||w!=160||h!=120) return 0;
|
||||
forward(bgr);
|
||||
return decode(boxes, max, th);
|
||||
}
|
||||
|
||||
void model_deinit() {
|
||||
if(gw){ for(int i=0;i<gn;++i) delete[] gw[i].d; delete[] gw; gw=nullptr; }
|
||||
delete m; m=nullptr; g_rdy=false;
|
||||
}
|
||||
|
||||
bool model_ready() { return g_rdy; }
|
||||
|
||||
Reference in New Issue
Block a user