目录
说明
百度网盘AI大赛-表格检测的第2名方案。
该算法包含表格边界框检测、表格分割和表格方向识别三个部分,首先,ppyoloe-plus-x 对边界框进行预测,并对置信度较高的表格边界框(box)进行裁剪。裁剪后的单个表格实例会送入到DBNet中进行语义分割,分割结果通过opencv轮廓处理获得表格关键点(point)。之后,我们根据DBNet计算的关键点在裁剪后的单个表格实例上绘制表格边界。最后,PP-LCNet结合表格边界先验和表格实例图像,对表格的方向进行预测,并根据之前定义的几何轮廓点与语义轮廓点的对应关系,将几何轮廓点映射为语义轮廓点。
本文使用C# OpenCvSharp DNN 实现百度网盘AI大赛-表格检测第2名方案第一部分-表格边界框检测。
效果
模型
Model Properties
-------------------------
date:2024-10-28T13:52:42.181333
description:Ultralytics YOLO11l model trained on coco.yaml
author:Ultralytics
version:8.3.23
task:detect
license:AGPL-3.0 License (https://ultralytics.com/license)
docs:https://docs.ultralytics.com
stride:32
batch:1
imgsz:[928, 928]
names:{0: 'table'}
---------------------------------------------------------------
Inputs
-------------------------
name:images
tensor:Float[1, 3, 928, 928]
---------------------------------------------------------------
Outputs
-------------------------
name:output0
tensor:Float[1, 5, 17661]
---------------------------------------------------------------
项目
代码
frmMain.cs
using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Windows.Forms;
namespace OpenCvSharp_DNN_Demo
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
}
YoloDet obj_detector;
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
Mat image;
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
pictureBox2.Image = null;
textBox1.Text = "";
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
image = new Mat(image_path);
}
private void Form1_Load(object sender, EventArgs e)
{
string obj_model_path = "model/yolo_obj_det.onnx";
obj_detector = new YoloDet(obj_model_path);
image_path = "test_img/real5.jpg";
pictureBox1.Image = new Bitmap(image_path);
}
private unsafe void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}
textBox1.Text = "检测中,请稍等……";
pictureBox2.Image = null;
Application.DoEvents();
Mat src = new Mat(image_path);
dt1 = DateTime.Now;
List
dt2 = DateTime.Now;
//绘制
Mat draw_img = src.Clone();
for (int i = 0; i < result.Count; i++)
{
Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);
Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);
Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);
//裁剪,保存,为下一步检测做准备
Mat crop_img = new Mat(src, r);
Cv2.ImWrite(i + ".jpg", crop_img);
}
pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream());
textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";
}
private void pictureBox2_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox2.Image);
}
private void pictureBox1_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox1.Image);
}
}
}
- using OpenCvSharp;
- using System;
- using System.Collections.Generic;
- using System.Drawing;
- using System.Windows.Forms;
-
- namespace OpenCvSharp_DNN_Demo
- {
- public partial class frmMain : Form
- {
- public frmMain()
- {
- InitializeComponent();
- }
-
- YoloDet obj_detector;
-
- string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
- string image_path = "";
-
- DateTime dt1 = DateTime.Now;
- DateTime dt2 = DateTime.Now;
-
- Mat image;
-
- private void button1_Click(object sender, EventArgs e)
- {
- OpenFileDialog ofd = new OpenFileDialog();
- ofd.Filter = fileFilter;
- if (ofd.ShowDialog() != DialogResult.OK) return;
-
- pictureBox1.Image = null;
- pictureBox2.Image = null;
- textBox1.Text = "";
-
- image_path = ofd.FileName;
- pictureBox1.Image = new Bitmap(image_path);
- image = new Mat(image_path);
- }
-
- private void Form1_Load(object sender, EventArgs e)
- {
- string obj_model_path = "model/yolo_obj_det.onnx";
-
- obj_detector = new YoloDet(obj_model_path);
-
- image_path = "test_img/real5.jpg";
- pictureBox1.Image = new Bitmap(image_path);
-
- }
-
- private unsafe void button2_Click(object sender, EventArgs e)
- {
- if (image_path == "")
- {
- return;
- }
- textBox1.Text = "检测中,请稍等……";
- pictureBox2.Image = null;
- Application.DoEvents();
-
- Mat src = new Mat(image_path);
-
- dt1 = DateTime.Now;
- List<Bbox> result = obj_detector.infer(src);
- dt2 = DateTime.Now;
-
- //绘制
- Mat draw_img = src.Clone();
- for (int i = 0; i < result.Count; i++)
- {
- Rect r = Rect.FromLTRB(result[i].xmin, result[i].ymin, result[i].xmax, result[i].ymax);
-
- Cv2.PutText(draw_img, $"table:{result[0].score:P0}", new OpenCvSharp.Point(r.TopLeft.X, r.TopLeft.Y - 10), HersheyFonts.HersheySimplex, 8, Scalar.Red, 8);
- Cv2.Rectangle(draw_img, r, Scalar.Red, thickness: 8);
-
- //裁剪,保存,为下一步检测做准备
- Mat crop_img = new Mat(src, r);
- Cv2.ImWrite(i + ".jpg", crop_img);
- }
- pictureBox2.Image = new Bitmap(draw_img.ToMemoryStream());
- textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";
- }
-
- private void pictureBox2_DoubleClick(object sender, EventArgs e)
- {
- Common.ShowNormalImg(pictureBox2.Image);
- }
-
- private void pictureBox1_DoubleClick(object sender, EventArgs e)
- {
- Common.ShowNormalImg(pictureBox1.Image);
- }
- }
- }
YoloDet.cs
- using OpenCvSharp;
- using OpenCvSharp.Dnn;
- using System;
- using System.Collections.Generic;
- using System.Linq;
-
- namespace OpenCvSharp_DNN_Demo
- {
- internal class YoloDet
- {
-
- Net model;
- int[] resize_shape = new int[2] { 928, 928 };
-
- public YoloDet(string model_path)
- {
- model = CvDnn.ReadNetFromOnnx(model_path);
- }
-
- unsafe public List<Bbox> infer(Mat srcimg, float score = 0.4f)
- {
- int ori_h = srcimg.Rows;
- int ori_w = srcimg.Cols;
- img_preprocess
- Mat img;
- int new_w = 0;
- int new_h = 0;
- int left = 0;
- int top = 0;
- img = Common.ResizePad(srcimg, resize_shape[0], ref new_w, ref new_h, ref left, ref top);
-
- //Cv2.ImWrite("0.jpg", img);
-
- img.ConvertTo(img, MatType.CV_32FC3, 1.0 / 255.0);
- Mat blob = CvDnn.BlobFromImage(img);
-
- model.SetInput(blob);
-
- //模型推理,读取推理结果
- Mat[] outs = new Mat[1] { new Mat() };
- string[] outBlobNames = model.GetUnconnectedOutLayersNames().ToArray();
- model.Forward(outs, outBlobNames);
-
- img_postprocess
- float x_factor = (float)ori_w / new_w;
- float y_factor = (float)ori_h / new_h;
- List<Rect> boxes = new List<Rect>();
- List<float> scores = new List<float>();
- int rows = outs[0].Size(2);
-
- //将推理结果转为float数据类型
- //5, 17661
- Mat result_mat = new Mat(5, 17661, MatType.CV_32F, outs[0].Data);
- result_mat = result_mat.T();
- //string str = result_mat.Dump();
-
- for (int i = 0; i < rows; i++)
- {
- float* ptr = (float*)(result_mat.Ptr(i).ToPointer());
- float max_score = ptr[4];
- //float max_score = result_mat.At<float>(i, 4);
-
- if (max_score >= score)
- {
- //At效率不高
- //float x1 = result_mat.At<float>(i, 0);
- //float y2 = result_mat.At<float>(i, 1);
- //float w2 = result_mat.At<float>(i, 2);
- //float h2 = result_mat.At<float>(i, 3);
-
- float x = ptr[0];
- float y = ptr[1];
- float w = ptr[2];
- float h = ptr[3];
-
- int xmin = Math.Max((int)((x - w / 2 - left) * x_factor), 0);
- int ymin = Math.Max((int)((y - h / 2 - top) * y_factor), 0);
- boxes.Add(new Rect(xmin, ymin, (int)(w * x_factor), (int)(h * y_factor)));
- scores.Add(max_score);
- }
- }
-
- int[] indices;
- CvDnn.NMSBoxes(boxes, scores, score, 0.4f, out indices);
- int num_keep = indices.Length;
- List<Bbox> bboxes = new List<Bbox>();
- for (int i = 0; i < num_keep; i++)
- {
- int ind = indices[i];
- bboxes.Add(new Bbox(boxes[ind].X, boxes[ind].Y, Math.Min(boxes[ind].X + boxes[ind].Width, ori_w - 1), Math.Min(boxes[ind].Y + boxes[ind].Height, ori_h - 1), scores[ind]));
- }
- return bboxes;
- }
- }
- }
-
- /*
-
- Model Properties
- -------------------------
- date:2024-10-28T13:52:42.181333
- description:Ultralytics YOLO11l model trained on coco.yaml
- author:Ultralytics
- version:8.3.23
- task:detect
- license:AGPL-3.0 License (https://ultralytics.com/license)
- docs:https://docs.ultralytics.com
- stride:32
- batch:1
- imgsz:[928, 928]
- names:{0: 'table'}
- ---------------------------------------------------------------
-
- Inputs
- -------------------------
- name:images
- tensor:Float[1, 3, 928, 928]
- ---------------------------------------------------------------
-
- Outputs
- -------------------------
- name:output0
- tensor:Float[1, 5, 17661]
- ---------------------------------------------------------------
-
- */
参考
- https://github.com/hpc203/TableDetection
-
- https://aistudio.baidu.com/projectdetail/5398861?searchKeyword=%E8%A1%A8%E6%A0%BC%E6%A3%80%E6%B5%8B%E5%A4%A7%E8%B5%9B&searchTab=ALL
下载
其他


评论记录:
回复评论: