#include "model.hpp" #include #include #include #include #include #include // ============================================================ // 内存预分配 // ============================================================ struct M { float stem[8*60*80]; float b1[16*60*80]; float b1_tmp[16*60*80]; float b2[28*30*40]; float b2_dw[16*30*40]; float b3[40*30*40]; float b3_dw[28*30*40]; float b4[56*15*20]; float b4_dw[40*15*20]; float b5[64*15*20]; float b5_dw[56*15*20]; float sh[24*15*20]; float cls[4*15*20]; float sz[2*15*20]; float dw_out[16*60*80]; // max(block1 skip 16×60×80, other dw) }; static M* m = nullptr; // ============================================================ // 权重表 // ============================================================ struct WT { char n[64]; int nd; int s[4]; float* d; }; static WT* gw = nullptr; static int gn = 0; static float* wf(const char* name) { for (int i = 0; i < gn; ++i) if (!std::strcmp(gw[i].n, name)) return gw[i].d; printf("[MODEL] MISS %s\n", name); return nullptr; } // ============================================================ // 基础算子 // ============================================================ static void relu_f(float* x, int N) { for (int i=0;i=H) continue; for (int kx=0; kx=W) continue; s += ww[ky*K+kx] * ii[iy*W+ix]; } } } oo[y*Wo+x] = s; } } } } static void batchnorm_f(float* x, const float* w, const float* b, const float* mean, const float* var, int C, int N) { const float eps = 1e-5f; for (int c=0; cdw_out, in, wf(na), nullptr, H, W, iC, iC, 3, str, iC); std::snprintf(na,128,"%s.dw.1.weight",pfx); batchnorm_f(m->dw_out, wf(na), wf((std::snprintf(na,128,"%s.dw.1.bias",pfx),na)), wf((std::snprintf(na,128,"%s.dw.1.running_mean",pfx),na)), wf((std::snprintf(na,128,"%s.dw.1.running_var",pfx),na)), iC, iC*No); relu_f(m->dw_out, iC*No); // pointwise conv + BN + ReLU (to output) std::snprintf(na,128,"%s.pw.0.weight",pfx); conv2d(o, m->dw_out, wf(na), nullptr, Ho, Wo, iC, oC, 1, 1, 1); std::snprintf(na,128,"%s.pw.1.weight",pfx); batchnorm_f(o, wf(na), wf((std::snprintf(na,128,"%s.pw.1.bias",pfx),na)), wf((std::snprintf(na,128,"%s.pw.1.running_mean",pfx),na)), wf((std::snprintf(na,128,"%s.pw.1.running_var",pfx),na)), oC, oC*No); relu_f(o, oC*No); // skip (no ReLU) float* skip = m->dw_out; // reuse buffer if (str==1 && iC==oC) { std::memcpy(skip, in, iC*H*W*4); } else { std::snprintf(na,128,"%s.skip.0.weight",pfx); conv2d(skip, in, wf(na), nullptr, H, W, iC, oC, 1, str, 1); std::snprintf(na,128,"%s.skip.1.weight",pfx); batchnorm_f(skip, wf(na), wf((std::snprintf(na,128,"%s.skip.1.bias",pfx),na)), wf((std::snprintf(na,128,"%s.skip.1.running_mean",pfx),na)), wf((std::snprintf(na,128,"%s.skip.1.running_var",pfx),na)), oC, oC*No); } // add skip for (int i=0; icls, 4, N); int cnt=0; for (int gy=0; gycls[c*N+idx]; if (s>bs) {bs=s; bc=c;} } if (bs=OH||nx<0||nx>=OW) continue; if (m->cls[bc*N+ny*OW+nx] > bs) pk=false; } if (!pk) continue; float pw=m->sz[0*N+idx], ph=m->sz[1*N+idx]; boxes[cnt].cls=bc; boxes[cnt].conf=bs; boxes[cnt].cx=((float)gx+0.5f)*ST; boxes[cnt].cy=((float)gy+0.5f)*ST; boxes[cnt].w=pw*RS[bc][0]; boxes[cnt].h=ph*RS[bc][1]; cnt++; } return cnt; } // ============================================================ // 前向推理 // ============================================================ static void forward(const uint8* bgr) { // 预处理 BGR→RGB float [0,1], CHW (堆分配, 避免栈溢出) float* in = new float[3*120*160]; for (int c=0; c<3; ++c) { int sc=2-c; float* ch=in+c*120*160; for (int y=0; y<120; ++y) { const uint8* row=bgr+y*160*3; for (int x=0; x<160; ++x) ch[y*160+x]=(float)row[x*3+sc]/255.0f; } } // stem conv_bn(m->stem, in, 120,160, wf("stem.0.weight"),nullptr, wf("stem.1.weight"),wf("stem.1.bias"), wf("stem.1.running_mean"),wf("stem.1.running_var"), 3,8,3,2,1,true); se_block(m->b1, m->stem, 8,16, 60,80, 1,"block1"); se_block(m->b2, m->b1, 16,28, 60,80, 2,"block2"); se_block(m->b3, m->b2, 28,40, 30,40, 1,"block3"); se_block(m->b4, m->b3, 40,56, 30,40, 2,"block4"); se_block(m->b5, m->b4, 56,64, 15,20, 1,"block5"); // shared conv_bn(m->sh, m->b5, 15,20, wf("shared.0.weight"),nullptr, wf("shared.1.weight"),wf("shared.1.bias"), wf("shared.1.running_mean"),wf("shared.1.running_var"), 64,24,1,1,1,true); // heads (1x1 conv, 无 BN/ReLU) conv2d(m->cls, m->sh, wf("cls_head.weight"), wf("cls_head.bias"), 15,20, 24,4, 1,1,1); conv2d(m->sz, m->sh, wf("size_head.weight"),wf("size_head.bias"),15,20, 24,2, 1,1,1); delete[] in; } // ============================================================ // 接口 // ============================================================ static bool g_rdy = false; bool model_init(const char* path) { FILE* f=fopen(path,"rb"); if(!f){ printf("[MODEL] open fail: %s\n",path); return false; } fread(&gn,sizeof(int),1,f); gw=new WT[gn]; for(int i=0;i