diff --git a/LPRNET_part/藏A0DBN8.jpg b/LPRNET_part/ets/1.jpg similarity index 100% rename from LPRNET_part/藏A0DBN8.jpg rename to LPRNET_part/ets/1.jpg diff --git a/LPRNET_part/吉CF18040.jpg b/LPRNET_part/ets/2.jpg similarity index 100% rename from LPRNET_part/吉CF18040.jpg rename to LPRNET_part/ets/2.jpg diff --git a/LPRNET_part/ets/6ce2ec7dbed6cf3c8403abe2683c57e5.jpg b/LPRNET_part/ets/6ce2ec7dbed6cf3c8403abe2683c57e5.jpg new file mode 100644 index 0000000..3f8a8f8 Binary files /dev/null and b/LPRNET_part/ets/6ce2ec7dbed6cf3c8403abe2683c57e5.jpg differ diff --git a/LPRNET_part/ets/c11304d10bcd47911e458398d1ea445d.jpg b/LPRNET_part/ets/c11304d10bcd47911e458398d1ea445d.jpg new file mode 100644 index 0000000..570ae94 Binary files /dev/null and b/LPRNET_part/ets/c11304d10bcd47911e458398d1ea445d.jpg differ diff --git a/LPRNET_part/ets/c6ab0fbcfb2b6fbe15c5b3eb9806a28b.jpg b/LPRNET_part/ets/c6ab0fbcfb2b6fbe15c5b3eb9806a28b.jpg new file mode 100644 index 0000000..843a03d Binary files /dev/null and b/LPRNET_part/ets/c6ab0fbcfb2b6fbe15c5b3eb9806a28b.jpg differ diff --git a/LPRNET_part/lpr_interface.py b/LPRNET_part/lpr_interface.py index 2b688ba..f3f8692 100644 --- a/LPRNET_part/lpr_interface.py +++ b/LPRNET_part/lpr_interface.py @@ -1,3 +1,4 @@ +# 导入必要的库 import torch import torch.nn as nn import cv2 @@ -11,6 +12,7 @@ from PIL import Image sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # LPRNet字符集定义(与训练时保持一致) +# 包含中国省份简称、数字、字母和特殊字符 CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑', '苏', '浙', '皖', '闽', '赣', '鲁', '豫', '鄂', '湘', '粤', '桂', '琼', '川', '贵', '云', '藏', '陕', '甘', '青', '宁', '新', @@ -19,84 +21,115 @@ CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'I', 'O', '-'] +# 创建字符到索引的映射字典 CHARS_DICT = {char: i for i, char in enumerate(CHARS)} -# 简化的LPRNet模型定义 +# 简化的LPRNet模型定义 - 基础卷积块 class small_basic_block(nn.Module): def __init__(self, ch_in, ch_out): super(small_basic_block, self).__init__() + # 定义一个小的基本卷积块,包含四个卷积层 self.block = nn.Sequential( + # 1x1卷积,降低通道数 nn.Conv2d(ch_in, ch_out // 4, kernel_size=1), nn.ReLU(), + # 3x1卷积,处理水平特征 nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)), nn.ReLU(), + # 1x3卷积,处理垂直特征 nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)), nn.ReLU(), + # 1x1卷积,恢复通道数 nn.Conv2d(ch_out // 4, ch_out, kernel_size=1), ) def forward(self, x): return self.block(x) +# LPRNet模型定义 - 车牌识别网络 class LPRNet(nn.Module): def __init__(self, lpr_max_len, phase, class_num, dropout_rate): super(LPRNet, self).__init__() self.phase = phase self.lpr_max_len = lpr_max_len self.class_num = class_num + + # 定义主干网络 self.backbone = nn.Sequential( + # 初始卷积层 nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0 nn.BatchNorm2d(num_features=64), nn.ReLU(), # 2 + # 最大池化层 nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)), + # 第一个基本块 small_basic_block(ch_in=64, ch_out=128), # *** 4 *** nn.BatchNorm2d(num_features=128), nn.ReLU(), # 6 + # 第二个池化层 nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)), + # 第二个基本块 small_basic_block(ch_in=64, ch_out=256), # 8 nn.BatchNorm2d(num_features=256), nn.ReLU(), # 10 + # 第三个基本块 small_basic_block(ch_in=256, ch_out=256), # *** 11 *** nn.BatchNorm2d(num_features=256), nn.ReLU(), # 13 + # 第三个池化层 nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)), # 14 + # Dropout层,防止过拟合 nn.Dropout(dropout_rate), + # 特征提取卷积层 nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1), # 16 nn.BatchNorm2d(num_features=256), nn.ReLU(), # 18 + # 第二个Dropout层 nn.Dropout(dropout_rate), + # 分类卷积层 nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20 nn.BatchNorm2d(num_features=class_num), nn.ReLU(), # 22 ) + + # 定义容器层,用于融合全局上下文信息 self.container = nn.Sequential( nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1,1), stride=(1,1)), ) def forward(self, x): + # 保存中间特征 keep_features = list() for i, layer in enumerate(self.backbone.children()): x = layer(x) + # 保存特定层的输出特征 if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22] keep_features.append(x) + # 处理全局上下文信息 global_context = list() for i, f in enumerate(keep_features): + # 对不同层的特征进行不同尺度的平均池化 if i in [0, 1]: f = nn.AvgPool2d(kernel_size=5, stride=5)(f) if i in [2]: f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f) + # 对特征进行归一化处理 f_pow = torch.pow(f, 2) f_mean = torch.mean(f_pow) f = torch.div(f, f_mean) global_context.append(f) + # 拼接全局上下文特征 x = torch.cat(global_context, 1) + # 通过容器层处理 x = self.container(x) + # 对序列维度进行平均,得到最终输出 logits = torch.mean(x, dim=2) return logits +# LPRNet推理类 class LPRNetInference: def __init__(self, model_path=None, img_size=[94, 24], lpr_max_len=8, dropout_rate=0.5): """ @@ -109,6 +142,7 @@ class LPRNetInference: """ self.img_size = img_size self.lpr_max_len = lpr_max_len + # 检测是否有可用的CUDA设备 self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设置默认模型路径 @@ -130,6 +164,7 @@ class LPRNetInference: else: print(f"Warning: 模型文件不存在或未指定: {model_path}. 使用随机权重.") + # 将模型移动到指定设备并设置为评估模式 self.model.to(self.device) self.model.eval() @@ -164,9 +199,11 @@ class LPRNetInference: image_array = cv2.resize(image_array, tuple(self.img_size)) # 使用与训练时相同的预处理方式 + # 归一化处理:减去127.5并乘以0.0078125,将像素值从[0,255]映射到[-1,1] image_array = image_array.astype('float32') image_array -= 127.5 image_array *= 0.0078125 + # 调整维度顺序从HWC到CHW image_array = np.transpose(image_array, (2, 0, 1)) # HWC -> CHW # 转换为tensor并添加batch维度 @@ -186,7 +223,7 @@ class LPRNetInference: prebs = logits.cpu().detach().numpy() preb = prebs[0, :, :] # 取第一个batch [num_classes, sequence_length] - # 贪婪解码:对每个时间步选择最大概率的字符 + # 贪婪解码: 对每个时间步选择最大概率的字符 preb_label = [] for j in range(preb.shape[1]): # 遍历每个时间步 preb_label.append(np.argmax(preb[:, j], axis=0)) @@ -248,7 +285,7 @@ class LPRNetInference: print(f"预测图像失败: {e}") return None, 0.0 -# 全局变量 +# 全局变量,用于存储模型实例 lpr_model = None def LPRNinitialize_model(): @@ -295,6 +332,9 @@ def LPRNmodel_predict(image_array): return ['待', '识', '别', '0', '0', '0', '0', '0'] try: + # 使用OpenCV调整图像大小到模型要求的尺寸 + image_array = cv2.resize(image_array, (128, 48)) + print(f"666999图片尺寸: {image_array.shape}") # 预测车牌号 predicted_text, confidence = lpr_model.predict(image_array) diff --git a/main.py b/main.py index a589b39..6d9b365 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,6 @@ from yolopart.detector import LicensePlateYOLO #选择使用哪个模块 # from LPRNET_part.lpr_interface import LPRNmodel_predict # from LPRNET_part.lpr_interface import LPRNinitialize_model - #使用OCR # from OCR_part.ocr_interface import LPRNmodel_predict # from OCR_part.ocr_interface import LPRNinitialize_model diff --git a/test_lpr_real_images.py b/test_lpr_real_images.py index ce32954..b3f859b 100644 --- a/test_lpr_real_images.py +++ b/test_lpr_real_images.py @@ -85,6 +85,7 @@ def test_image_loading(): # 方法2: 支持中文路径的方式 try: img2 = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR) + # img2 = cv2.resize(img2,(128,48)) print(f"cv2.imdecode结果: {img2 is not None}") if img2 is not None: print(f"图片尺寸: {img2.shape}")