首页 最新 热门 推荐

  • 首页
  • 最新
  • 热门
  • 推荐

C# Open Vocabulary Object Detection 部署开放域目标检测

  • 25-02-19 03:00
  • 2463
  • 10421
blog.csdn.net

目录

介绍

效果

模型信息

owlvit-image.onnx

owlvit-post.onnx

owlvit-text.onnx

项目

代码

Form1.cs

OWLVIT.cs 

下载 


C# Open Vocabulary Object Detection 部署开放域目标检测

介绍

训练源码地址:https://github.com/google-research/scenic/tree/main/scenic/projects/owl_vit

效果

模型信息

owlvit-image.onnx

Inputs
-------------------------
name:pixel_values
tensor:Float[1, 3, 768, 768]
---------------------------------------------------------------

Outputs
-------------------------
name:image_embeds
tensor:Float[1, 24, 24, 768]
name:pred_boxes
tensor:Float[1, 576, 4]
---------------------------------------------------------------

owlvit-post.onnx

Inputs
-------------------------
name:image_embeds
tensor:Float[1, 24, 24, 768]
name:/owlvit/Div_output_0
tensor:Float[1, 512]
name:input_ids
tensor:Int64[1, 16]
---------------------------------------------------------------

Outputs
-------------------------
name:logits
tensor:Float[-1, 576, 1]
---------------------------------------------------------------

owlvit-text.onnx

Inputs
-------------------------
name:input_ids
tensor:Int64[1, 16]
name:attention_mask
tensor:Int64[1, 16]
---------------------------------------------------------------

Outputs
-------------------------
name:text_embeds
tensor:Float[1, 1, 512]
---------------------------------------------------------------

项目

代码

Form1.cs

using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace Onnx_Demo
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        OWLVIT owlvit = new OWLVIT("model/owlvit-image.onnx", "model/owlvit-text.onnx", "model/owlvit-post.onnx", "model/vocab.txt");

        string image_path = "";
        string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";

        StringBuilder sb = new StringBuilder();

        Mat image;
        Mat result_image;

        private void button2_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            pictureBox1.Image = null;
            pictureBox2.Image = null;
            txtInfo.Text = "";

            image_path = ofd.FileName;
            pictureBox2.Image = new Bitmap(image_path);
            image = new Mat(image_path);

        }

        private void button3_Click(object sender, EventArgs e)
        {
            if (image_path == "")
            {
                return;
            }

            if (String.IsNullOrEmpty(txt_input_text.Text))
            {
                return;
            }

            pictureBox1.Image = null;
            txtInfo.Text = "检测中,请稍等……";
            button3.Enabled=false;
            if (pictureBox1.Image!=null)
            {
                pictureBox1.Image.Dispose();
                pictureBox1.Image = null;   
            }
            Application.DoEvents();

            List texts = txt_input_text.Text.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            owlvit.encode_texts(texts);

            List objects = owlvit.detect(image, texts);

            result_image = image.Clone();
            sb.Clear();
            for (int i = 0; i < objects.Count; i++)
            {
                Cv2.Rectangle(result_image, objects[i].box, new Scalar(0, 0, 255), 2);
                Cv2.PutText(result_image, objects[i].text + " " + objects[i].prob.ToString("F2"), new OpenCvSharp.Point(objects[i].box.X, objects[i].box.Y), HersheyFonts.HersheySimplex, 1, new Scalar(0, 0, 255), 2); ;
                sb.AppendLine(objects[i].text + " " + objects[i].prob.ToString("F2"));
            }
            pictureBox1.Image = new Bitmap(result_image.ToMemoryStream());

            button3.Enabled = true;
            txtInfo.Text = sb.ToString();

        }

        private void Form1_Load(object sender, EventArgs e)
        {
            image_path = "test_img/2.jpg";
            pictureBox2.Image = new Bitmap(image_path);
            image = new Mat(image_path);

            owlvit.encode_image(image);
        }
    }
}

  1. using OpenCvSharp;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Drawing;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Windows.Forms;
  8. namespace Onnx_Demo
  9. {
  10. public partial class Form1 : Form
  11. {
  12. public Form1()
  13. {
  14. InitializeComponent();
  15. }
  16. OWLVIT owlvit = new OWLVIT("model/owlvit-image.onnx", "model/owlvit-text.onnx", "model/owlvit-post.onnx", "model/vocab.txt");
  17. string image_path = "";
  18. string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
  19. StringBuilder sb = new StringBuilder();
  20. Mat image;
  21. Mat result_image;
  22. private void button2_Click(object sender, EventArgs e)
  23. {
  24. OpenFileDialog ofd = new OpenFileDialog();
  25. ofd.Filter = fileFilter;
  26. if (ofd.ShowDialog() != DialogResult.OK) return;
  27. pictureBox1.Image = null;
  28. pictureBox2.Image = null;
  29. txtInfo.Text = "";
  30. image_path = ofd.FileName;
  31. pictureBox2.Image = new Bitmap(image_path);
  32. image = new Mat(image_path);
  33. }
  34. private void button3_Click(object sender, EventArgs e)
  35. {
  36. if (image_path == "")
  37. {
  38. return;
  39. }
  40. if (String.IsNullOrEmpty(txt_input_text.Text))
  41. {
  42. return;
  43. }
  44. pictureBox1.Image = null;
  45. txtInfo.Text = "检测中,请稍等……";
  46. button3.Enabled=false;
  47. if (pictureBox1.Image!=null)
  48. {
  49. pictureBox1.Image.Dispose();
  50. pictureBox1.Image = null;
  51. }
  52. Application.DoEvents();
  53. List<string> texts = txt_input_text.Text.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries).ToList();
  54. owlvit.encode_texts(texts);
  55. List<BoxInfo> objects = owlvit.detect(image, texts);
  56. result_image = image.Clone();
  57. sb.Clear();
  58. for (int i = 0; i < objects.Count; i++)
  59. {
  60. Cv2.Rectangle(result_image, objects[i].box, new Scalar(0, 0, 255), 2);
  61. Cv2.PutText(result_image, objects[i].text + " " + objects[i].prob.ToString("F2"), new OpenCvSharp.Point(objects[i].box.X, objects[i].box.Y), HersheyFonts.HersheySimplex, 1, new Scalar(0, 0, 255), 2); ;
  62. sb.AppendLine(objects[i].text + " " + objects[i].prob.ToString("F2"));
  63. }
  64. pictureBox1.Image = new Bitmap(result_image.ToMemoryStream());
  65. button3.Enabled = true;
  66. txtInfo.Text = sb.ToString();
  67. }
  68. private void Form1_Load(object sender, EventArgs e)
  69. {
  70. image_path = "test_img/2.jpg";
  71. pictureBox2.Image = new Bitmap(image_path);
  72. image = new Mat(image_path);
  73. owlvit.encode_image(image);
  74. }
  75. }
  76. }

OWLVIT.cs 

  1. using Microsoft.ML.OnnxRuntime;
  2. using Microsoft.ML.OnnxRuntime.Tensors;
  3. using OpenCvSharp;
  4. using OpenCvSharp.Dnn;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Linq;
  8. namespace Onnx_Demo
  9. {
  10. public class OWLVIT
  11. {
  12. float bbox_threshold = 0.02f;
  13. int inpWidth = 768;
  14. int inpHeight = 768;
  15. float[] mean = new float[] { 0.48145466f, 0.4578275f, 0.40821073f };
  16. float[] std = new float[] { 0.26862954f, 0.26130258f, 0.27577711f };
  17. Net net;
  18. float[] image_features_input;
  19. SessionOptions options;
  20. InferenceSession onnx_session;
  21. List<NamedOnnxValue> input_container;
  22. IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;
  23. DisposableNamedOnnxValue[] results_onnxvalue;
  24. Tensor<float> result_tensors;
  25. TokenizerBase tokenizer;
  26. SessionOptions options_transformer;
  27. InferenceSession onnx_session_transformer;
  28. float[] image_features;
  29. List<long[]> input_ids = new List<long[]>();
  30. List<float[]> text_features = new List<float[]>();
  31. long[] attention_mask;
  32. int len_image_feature = 24 * 24 * 768;
  33. int cnt_pred_boxes = 576;
  34. int len_text_token = 16;
  35. int context_length = 52;
  36. int len_text_feature = 512;
  37. int[] image_features_shape = { 1, 24, 24, 768 };
  38. int[] text_features_shape = { 1, 512 };
  39. public int imgnum = 0;
  40. public List<string> imglist = new List<string>();
  41. List<Rect2f> pred_boxes = new List<Rect2f>();
  42. public OWLVIT(string image_modelpath, string text_modelpath, string decoder_model_path, string vocab_path)
  43. {
  44. net = CvDnn.ReadNetFromOnnx(image_modelpath);
  45. input_container = new List<NamedOnnxValue>();
  46. options = new SessionOptions();
  47. options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
  48. options.AppendExecutionProvider_CPU(0);
  49. onnx_session = new InferenceSession(text_modelpath, options);
  50. options_transformer = new SessionOptions();
  51. options_transformer.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
  52. options_transformer.AppendExecutionProvider_CPU(0);
  53. onnx_session_transformer = new InferenceSession(decoder_model_path, options);
  54. load_tokenizer(vocab_path);
  55. }
  56. void load_tokenizer(string vocab_path)
  57. {
  58. tokenizer = new TokenizerClip();
  59. tokenizer.load_tokenize(vocab_path);
  60. }
  61. Mat normalize_(Mat src)
  62. {
  63. Cv2.CvtColor(src, src, ColorConversionCodes.BGR2RGB);
  64. Mat[] bgr = src.Split();
  65. for (int i = 0; i < bgr.Length; ++i)
  66. {
  67. bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 / (255.0 * std[i]), (0.0 - mean[i]) / std[i]);
  68. }
  69. Cv2.Merge(bgr, src);
  70. foreach (Mat channel in bgr)
  71. {
  72. channel.Dispose();
  73. }
  74. return src;
  75. }
  76. float sigmoid(float x)
  77. {
  78. return (float)(1.0f / (1.0f + Math.Exp(-x)));
  79. }
  80. public unsafe void encode_image(Mat srcimg)
  81. {
  82. pred_boxes.Clear();
  83. Mat temp_image = new Mat();
  84. Cv2.Resize(srcimg, temp_image, new Size(inpWidth, inpHeight));
  85. Mat normalized_mat = normalize_(temp_image);
  86. Mat blob = CvDnn.BlobFromImage(normalized_mat);
  87. net.SetInput(blob);
  88. //模型推理,读取推理结果
  89. Mat[] outs = new Mat[2] { new Mat(), new Mat() };
  90. string[] outBlobNames = net.GetUnconnectedOutLayersNames().ToArray();
  91. net.Forward(outs, outBlobNames);
  92. float* ptr_feat = (float*)outs[0].Data;
  93. image_features = new float[len_image_feature];
  94. for (int i = 0; i < len_image_feature; i++)
  95. {
  96. image_features[i] = ptr_feat[i];
  97. }
  98. float* ptr_box = (float*)outs[1].Data;
  99. Rect2f temp;
  100. for (int i = 0; i < cnt_pred_boxes; i++)
  101. {
  102. float xc = ptr_box[i * 4 + 0] * inpWidth;
  103. float yc = ptr_box[i * 4 + 1] * inpHeight;
  104. temp = new Rect2f();
  105. temp.Width = ptr_box[i * 4 + 2] * inpWidth;
  106. temp.Height = ptr_box[i * 4 + 3] * inpHeight;
  107. temp.X = (float)(xc - temp.Width * 0.5);
  108. temp.Y = (float)(yc - temp.Height * 0.5);
  109. pred_boxes.Add(temp);
  110. }
  111. }
  112. public unsafe void encode_texts(List<string> texts)
  113. {
  114. List<List<int>> text_token = new List>(texts.Count);
  115. for (int i = 0; i < texts.Count; i++)
  116. {
  117. text_token.Add(new List<int>());
  118. }
  119. text_features.Clear();
  120. input_ids.Clear();
  121. for (int i = 0; i < texts.Count; i++)
  122. {
  123. tokenizer.encode_text(texts[i], text_token[i]);
  124. int len_ids = text_token[i].Count;
  125. long[] temp_ids = new long[len_text_token];
  126. attention_mask = new long[len_text_token];
  127. for (int j = 0; j < len_text_token; j++)
  128. {
  129. if (j < len_ids)
  130. {
  131. temp_ids[j] = text_token[i][j];
  132. attention_mask[j] = 1;
  133. }
  134. else
  135. {
  136. temp_ids[j] = 0;
  137. attention_mask[j] = 0;
  138. }
  139. }
  140. input_ids.Add(temp_ids);
  141. input_container.Clear();
  142. Tensor<long> input_tensor = new DenseTensor<long>(input_ids[i], new[] { 1, len_text_token });
  143. Tensor<long> input_tensor_mask = new DenseTensor<long>(attention_mask, new[] { 1, attention_mask.Length });
  144. input_container.Add(NamedOnnxValue.CreateFromTensor("input_ids", input_tensor));
  145. input_container.Add(NamedOnnxValue.CreateFromTensor("attention_mask", input_tensor));
  146. result_infer = onnx_session.Run(input_container);
  147. results_onnxvalue = result_infer.ToArray();
  148. result_tensors = results_onnxvalue[0].AsTensor<float>();
  149. float[] temp_text_features = results_onnxvalue[0].AsTensor<float>().ToArray();
  150. text_features.Add(temp_text_features);
  151. }
  152. }
  153. List<float> decode(float[] input_image_feature, float[] input_text_feature, long[] input_id)
  154. {
  155. input_container.Clear();
  156. Tensor<float> input_tensor_image_embeds = new DenseTensor<float>(input_image_feature, image_features_shape);
  157. Tensor<float> input_tensor_Div_output_0 = new DenseTensor<float>(input_text_feature, text_features_shape);
  158. Tensor<long> input_ids = new DenseTensor<long>(input_id, new[] { 1, 16 });
  159. /*
  160. name:image_embeds
  161. tensor:Float[1, 24, 24, 768]
  162. name:/owlvit/Div_output_0
  163. tensor:Float[1, 512]
  164. name:input_ids
  165. tensor:Int64[1, 16]
  166. */
  167. input_container.Add(NamedOnnxValue.CreateFromTensor("image_embeds", input_tensor_image_embeds));
  168. input_container.Add(NamedOnnxValue.CreateFromTensor("/owlvit/Div_output_0", input_tensor_Div_output_0));
  169. input_container.Add(NamedOnnxValue.CreateFromTensor("input_ids", input_ids));
  170. result_infer = onnx_session_transformer.Run(input_container);
  171. results_onnxvalue = result_infer.ToArray();
  172. result_tensors = results_onnxvalue[0].AsTensor<float>();
  173. return results_onnxvalue[0].AsTensor<float>().ToList();
  174. }
  175. public List<BoxInfo> detect(Mat srcimg, List<string> texts)
  176. {
  177. float ratioh = 1.0f * srcimg.Rows / inpHeight;
  178. float ratiow = 1.0f * srcimg.Cols / inpWidth;
  179. List<float> confidences = new List<float>();
  180. List<Rect> boxes = new List<Rect>();
  181. List<string> className = new List<string>();
  182. for (int i = 0; i < input_ids.Count; i++)
  183. {
  184. List<float> logits = decode(image_features, text_features[i], input_ids[i]);
  185. for (int j = 0; j < logits.Count; j++)
  186. {
  187. float score = sigmoid(logits[j]);
  188. if (score >= bbox_threshold)
  189. {
  190. //还原回到原图
  191. int xmin = (int)(pred_boxes[j].X * ratiow);
  192. int ymin = (int)(pred_boxes[j].Y * ratioh);
  193. int xmax = (int)((pred_boxes[j].X + pred_boxes[j].Width) * ratiow);
  194. int ymax = (int)((pred_boxes[j].Y + pred_boxes[j].Height) * ratioh);
  195. //越界检查保护
  196. xmin = Math.Max(Math.Min(xmin, srcimg.Cols - 1), 0);
  197. ymin = Math.Max(Math.Min(ymin, srcimg.Rows - 1), 0);
  198. xmax = Math.Max(Math.Min(xmax, srcimg.Cols - 1), 0);
  199. ymax = Math.Max(Math.Min(ymax, srcimg.Rows - 1), 0);
  200. boxes.Add(new Rect(xmin, ymin, xmax - xmin, ymax - ymin));
  201. confidences.Add(score);
  202. className.Add(texts[i]);
  203. }
  204. }
  205. }
  206. float nmsThreshold = 0.5f;
  207. int[] indices;
  208. CvDnn.NMSBoxes(boxes, confidences, bbox_threshold, nmsThreshold, out indices);
  209. List<BoxInfo> objects = new List<BoxInfo>();
  210. for (int i = 0; i < indices.Length; ++i)
  211. {
  212. BoxInfo temp = new BoxInfo();
  213. temp.text = className[i];
  214. temp.prob = confidences[i];
  215. temp.box = boxes[i];
  216. objects.Add(temp);
  217. }
  218. return objects;
  219. }
  220. }
  221. }

下载 

源码下载

天天代码码天天
微信公众号
.NET 人工智能实践
注:本文转载自blog.csdn.net的天天代码码天天的文章"https://lw112190.blog.csdn.net/article/details/135503886"。版权归原作者所有,此博客不拥有其著作权,亦不承担相应法律责任。如有侵权,请联系我们删除。
复制链接
复制链接
相关推荐
发表评论
登录后才能发表评论和回复 注册

/ 登录

评论记录:

未查询到任何数据!
回复评论:

分类栏目

后端 (14832) 前端 (14280) 移动开发 (3760) 编程语言 (3851) Java (3904) Python (3298) 人工智能 (10119) AIGC (2810) 大数据 (3499) 数据库 (3945) 数据结构与算法 (3757) 音视频 (2669) 云原生 (3145) 云平台 (2965) 前沿技术 (2993) 开源 (2160) 小程序 (2860) 运维 (2533) 服务器 (2698) 操作系统 (2325) 硬件开发 (2492) 嵌入式 (2955) 微软技术 (2769) 软件工程 (2056) 测试 (2865) 网络空间安全 (2948) 网络与通信 (2797) 用户体验设计 (2592) 学习和成长 (2593) 搜索 (2744) 开发工具 (7108) 游戏 (2829) HarmonyOS (2935) 区块链 (2782) 数学 (3112) 3C硬件 (2759) 资讯 (2909) Android (4709) iOS (1850) 代码人生 (3043) 阅读 (2841)

热门文章

101
推荐
关于我们 隐私政策 免责声明 联系我们
Copyright © 2020-2025 蚁人论坛 (iYenn.com) All Rights Reserved.
Scroll to Top