目录
效果
项目
代码
using Aspose.Cells;
using NLog;
using OpenCvSharp;
using OpenVINO.OCRService;
using Sdcb.OpenVINO;
using Sdcb.OpenVINO.PaddleOCR;
using Sdcb.OpenVINO.PaddleOCR.Models;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace OpenVINO.OCR
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this);
}
String startupPath;
private string excelFileFilter = "表格|*.xlsx;*.xls;";
private Logger log = NLog.LogManager.GetCurrentClassLogger();
CancellationTokenSource cts;
ConcurrentQueue
ConcurrentQueue
bool saveImg = false;
bool saveOcr = false;
int ocrNum = 0;//完成OCR识别的数量
int totalCount = 0;//图片总数量
int downloadCount = 0;//图片下载数量
int vioIDCount = 0;//违规ID;
private void frmMain_Load(object sender, EventArgs e)
{
DateTime limitTime = new DateTime(2024, 08, 30, 00, 00, 00);
//测试使用
if (DateTime.Now > limitTime)
{
MessageBox.Show("此软件试用期已过");
Application.Exit();
}
//初始化
startupPath = System.Windows.Forms.Application.StartupPath;
string detectionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_det_infer";
string classificationModelDir = startupPath + "\\inference\\ch_ppocr_mobile_v2.0_cls_infer";
string recognitionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_rec_infer";
string labelFilePath = startupPath + "\\inference\\ppocr_keys.txt";
FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);
PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions();
paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions("CPU");
paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800);
paddleOcrOptions.RecognitionStaticWidth = 512;
Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions);
Program.paddleOcr.Detector.UnclipRatio = 1.5f;
Program.paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */
Program.paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = 512;
//加载违禁词
Common.ltRuleContains.Clear();
Common.ltRuleTel.Clear();
string ruleContainsPath = "rules\\rule_contains.txt";
if (File.Exists(ruleContainsPath))
{
Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();
}
StringBuilder sb = new StringBuilder();
foreach (var item in Common.ltRuleContains)
{
sb.AppendLine(item);
}
log.Info("rule_contains.txt---->包含" + Common.ltRuleContains.Count() + "个违禁词,内容如下:\r\n" + sb.ToString());
string ruleTelPath = "rules\\rule_tel.txt";
if (File.Exists(ruleTelPath))
{
foreach (var item in File.ReadAllLines(ruleTelPath))
{
Common.ltRuleTel.Add(item.ToLower());
}
}
sb.Clear();
foreach (var item in Common.ltRuleTel)
{
sb.AppendLine(item);
}
log.Info("rule_tel.txt---->包含" + Common.ltRuleTel.Count() + "个号码前缀,内容如下:\r\n" + sb.ToString());
}
///
/// 选择表格
///
///
///
private void button2_Click(object sender, EventArgs e)
{
try
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = excelFileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
log.Info("解析中……");
Application.DoEvents();
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
string excelPath = ofd.FileName;
Workbook workbook = new Workbook(excelPath);
Cells cells = workbook.Worksheets[0].Cells;
System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitle
ltImgInfo = new ConcurrentQueue
//遍历
ImgInfo temp;
int imgCount = 0;
foreach (DataRow row in dataTable1.Rows)
{
temp = new ImgInfo();
temp.id = row[0].ToString();
temp.title = row[1].ToString();
List
for (int i = 2; i < cells.MaxColumn + 1; i++)
{
string tempStr = row[i].ToString();
if (!string.IsNullOrEmpty(tempStr))
{
if (i >= 7)
{
List
if (ltScrUrlTemp.Count > 0)
{
foreach (var item in ltScrUrlTemp)
{
MatInfo matInfo = new MatInfo();
matInfo.url = item;
list.Add(matInfo);
}
}
}
else
{
MatInfo matInfo = new MatInfo();
matInfo.url = tempStr;
list.Add(matInfo);
}
}
}
temp.images = list;
imgCount = imgCount + list.Count();
ltImgInfo.Enqueue(temp);
//for test
//if (ltImgInfo.Count()>10)
//{
// break;
//}
}
log.Info("解析完毕,一共[" + ltImgInfo.Count + "]条记录,[" + imgCount + "]张图片,耗时:" + sw.ElapsedMilliseconds + "毫秒");
}
catch (Exception ex)
{
log.Error("解析表格异常:" + ex.Message);
MessageBox.Show("解析表格异常:" + ex.Message);
}
}
void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount)
{
txtTotal.Invoke(new Action(() =>
{
TimeSpan ts = TimeSpan.FromMilliseconds(time);
txtTotal.Text = string.Format("下载完成:{0}/{1},识别完成:{2}/{3},违规ID数量:{5},用时:{4}"
, downloadCount
, total
, ocrNum
, total
, ts.ToString()
, vioIDCount
);
}));
}
///
/// 下载识别
///
///
///
private void button1_Click(object sender, EventArgs e)
{
if (ltImgInfo.Count == 0)
{
MessageBox.Show("请先选择表格!");
return;
}
DialogResult result = MessageBox.Show("确认开始下载识别?此操作会清空上一次的数据,请注意备份!", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
if (result == DialogResult.Yes)
{
log.Info("确认开始下载识别!");
}
else
{
log.Info("取消开始下载识别!");
return;
}
if (!Directory.Exists("img"))
{
Directory.CreateDirectory("img");
}
if (!Directory.Exists("ocr_result"))
{
Directory.CreateDirectory("ocr_result");
}
if (!Directory.Exists("result"))
{
Directory.CreateDirectory("result");
}
if (!Directory.Exists("result//img"))
{
Directory.CreateDirectory("result//img");
}
//清空结果
File.WriteAllText("result//result.txt", "");
File.WriteAllText("result//result_detail.txt", "");
// 清空文件夹中的文件
foreach (string filePath in Directory.GetFiles("result", "*", SearchOption.AllDirectories))
{
File.Delete(filePath);
}
// 写入列标题
File.WriteAllText("result//result.txt", "id\ttitel\tcontent\r\n");
btnStop.Enabled = true;
btnStart.Enabled = false;
chkSaveImg.Enabled = false;
chkSaveOcr.Enabled = false;
if (chkSaveImg.Checked)
{
saveImg = true;
}
else
{
saveImg = false;
}
if (chkSaveOcr.Checked)
{
saveOcr = true;
}
else
{
saveOcr = false;
}
Application.DoEvents();
cts = new CancellationTokenSource();
Stopwatch total = new Stopwatch();
total.Start(); //开始计时
// 清空队列
matQueue = new ConcurrentQueue
//while (!matQueue.IsEmpty)
//{
// matQueue.TryDequeue(out _);
//}
ocrNum = 0;//完成OCR识别的数量
totalCount = ltImgInfo.Count();//图片总数量
downloadCount = 0;
//下载线程
int downloadThreadNum = 2;
for (int i = 0; i < downloadThreadNum; i++)
{
Task.Factory.StartNew(() =>
{
while (true)
{
//判断是否被取消;
if (cts.Token.IsCancellationRequested)
{
return;
}
if (downloadCount == totalCount)
{
log.Info("--------------------------------->下载完成!<----------------------------------");
return;
}
ImgInfo imgInfo = new ImgInfo();
if (ltImgInfo.TryDequeue(out imgInfo))
{
//队列容量大于50 休息一秒
if (matQueue.Count > 50)
{
System.Threading.Thread.Sleep(1000);
}
if (matQueue.Count > 100)
{
System.Threading.Thread.Sleep(2000);
}
int imagesCount = imgInfo.images.Count();
for (int j = 0; j < imagesCount; j++)
{
try
{
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest;
request.KeepAlive = false;
request.ServicePoint.Expect100Continue = false;
request.Timeout = 2000;// 2秒
request.ReadWriteTimeout = 2000;//2秒
request.ServicePoint.UseNagleAlgorithm = false;
request.ServicePoint.ConnectionLimit = 65500;
request.AllowWriteStreamBuffering = false;
request.Proxy = null;
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(new Cookie("AspxAutoDetectCookieSupport", "1") { Domain = new Uri(imgInfo.images[j].url).Host });
HttpWebResponse wresp = (HttpWebResponse)request.GetResponse();
Stream s = wresp.GetResponseStream();
Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s);
s.Dispose();
wresp.Close();
wresp.Dispose();
request.Abort();
sw.Stop();
if (saveImg)
{
bmp.Save("img//" + imgInfo.id + "_" + j + ".jpg");
}
var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);
if (mat.Channels() == 4)
{
Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);
}
imgInfo.images[j].mat = mat;
imgInfo.images[j].name = imgInfo.id + "_" + j;
if (saveImg)
{
bmp.Save("img//" + imgInfo.images[j].name + ".jpg");
}
log.Info(" " + imgInfo.images[j].name + "-->下载用时:" + sw.ElapsedMilliseconds + "毫秒");
}
catch (Exception ex)
{
log.Error("---->id:" + imgInfo.id + ",url[" + imgInfo.images[j].url + "],下载异常:" + ex.Message);
}
}
matQueue.Enqueue(imgInfo);
Interlocked.Increment(ref downloadCount);
}
}
}, TaskCreationOptions.LongRunning);
}
//识别线程
Task.Factory.StartNew(() =>
{
while (true)
{
//判断是否被取消;
if (cts.Token.IsCancellationRequested)
{
return;
}
if (ocrNum == totalCount)
{
log.Info("--------------------------------->识别完成!<----------------------------------");
return;
}
ImgInfo imgInfo = new ImgInfo();
if (matQueue.TryDequeue(out imgInfo))
{
Stopwatch perID = new Stopwatch();
perID.Start();//开始计时
int imagesCount = imgInfo.images.Count();
for (int j = 0; j < imagesCount; j++)
{
//Mat mat= imgInfo.images[j].mat;
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
PaddleOcrResult ocrResult = null;
try
{
if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty()))
{
ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);
sw.Stop();
log.Info(" " + imgInfo.images[j].name + "---->识别用时:" + sw.ElapsedMilliseconds + "毫秒");
//string ocrInfo = ocrResult.Text.ToString();
string ocrInfo = string.Join("\n", from x in ocrResult.Regions
where x.Score > 0.8
orderby x.Rect.Center.Y, x.Rect.Center.X
select x.Text);
if (saveOcr)
{
File.WriteAllText("ocr_result//" + imgInfo.images[j].name + ".txt", ocrInfo);
}
//规则校验
Stopwatch ruleSw = new Stopwatch();
ruleSw.Start();//开始计时
ocrInfo = ocrInfo.Trim();
ocrInfo = ocrInfo.Replace(" ", "");
string words = "";
string resultInfo = "";
if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult))
{
resultInfo = string.Format("ID:{0},Title:[{1}],------>包含违禁词:{2}", imgInfo.id, imgInfo.title, words);
log.Info(resultInfo);
//存数据
File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t包含违禁词:" + words + "\r\n");
File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
//存图
Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break;
}
if (Common.RuleTelCheck(ocrInfo, out words, ocrResult))
{
resultInfo = string.Format("ID:{0},Title:[{1}],------>疑似包含电话号码:{2}", imgInfo.id, imgInfo.title, words);
log.Info(resultInfo);
//File.AppendAllText("result//result.txt", resultInfo+ "\r\n");
File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t疑似包含电话号码:" + words + "\r\n");
File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
//存图
Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break;
}
imgInfo.images[j].mat.Dispose();
ruleSw.Stop();
//log.Info(" " + imgInfo.images[j].name + "---->违禁词校验用时:" + ruleSw.ElapsedMilliseconds + "毫秒");
}
}
catch (Exception ex)
{
imgInfo.images[j].mat.Dispose();
log.Info(" " + imgInfo.images[j].name + "---->识别异常:" + ex.Message);
}
}
perID.Stop();
log.Info("---->id:" + imgInfo.id + ",图片张数[" + imagesCount + "],识别小计用时:" + perID.ElapsedMilliseconds + "毫秒");
Interlocked.Increment(ref ocrNum);
ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);
}
}
}, TaskCreationOptions.LongRunning);
}
///
/// 停止
///
///
///
private void button3_Click(object sender, EventArgs e)
{
cts.Cancel();
btnStop.Enabled = false;
btnStart.Enabled = true;
chkSaveImg.Enabled = true;
chkSaveOcr.Enabled = true;
}
}
}
- using Aspose.Cells;
- using NLog;
- using OpenCvSharp;
- using OpenVINO.OCRService;
- using Sdcb.OpenVINO;
- using Sdcb.OpenVINO.PaddleOCR;
- using Sdcb.OpenVINO.PaddleOCR.Models;
- using System;
- using System.Collections.Concurrent;
- using System.Collections.Generic;
- using System.Data;
- using System.Diagnostics;
- using System.Drawing;
- using System.IO;
- using System.Linq;
- using System.Net;
- using System.Text;
- using System.Threading;
- using System.Threading.Tasks;
- using System.Windows.Forms;
-
- namespace OpenVINO.OCR
- {
- public partial class frmMain : Form
- {
- public frmMain()
- {
- InitializeComponent();
- NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this);
- }
-
- String startupPath;
- private string excelFileFilter = "表格|*.xlsx;*.xls;";
- private Logger log = NLog.LogManager.GetCurrentClassLogger();
- CancellationTokenSource cts;
-
- ConcurrentQueue<ImgInfo> ltImgInfo = new ConcurrentQueue<ImgInfo>();
- ConcurrentQueue<ImgInfo> matQueue = new ConcurrentQueue<ImgInfo>();
-
- bool saveImg = false;
- bool saveOcr = false;
-
- int ocrNum = 0;//完成OCR识别的数量
- int totalCount = 0;//图片总数量
- int downloadCount = 0;//图片下载数量
- int vioIDCount = 0;//违规ID;
-
- private void frmMain_Load(object sender, EventArgs e)
- {
- DateTime limitTime = new DateTime(2024, 08, 30, 00, 00, 00);
- //测试使用
- if (DateTime.Now > limitTime)
- {
- MessageBox.Show("此软件试用期已过");
- Application.Exit();
- }
-
- //初始化
- startupPath = System.Windows.Forms.Application.StartupPath;
-
-
- string detectionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_det_infer";
- string classificationModelDir = startupPath + "\\inference\\ch_ppocr_mobile_v2.0_cls_infer";
- string recognitionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_rec_infer";
- string labelFilePath = startupPath + "\\inference\\ppocr_keys.txt";
-
- FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);
-
- PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions();
- paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions("CPU");
- paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800);
- paddleOcrOptions.RecognitionStaticWidth = 512;
-
- Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions);
- Program.paddleOcr.Detector.UnclipRatio = 1.5f;
- Program.paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */
- Program.paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */
-
- ServicePointManager.Expect100Continue = false;
- ServicePointManager.DefaultConnectionLimit = 512;
-
- //加载违禁词
- Common.ltRuleContains.Clear();
- Common.ltRuleTel.Clear();
-
- string ruleContainsPath = "rules\\rule_contains.txt";
- if (File.Exists(ruleContainsPath))
- {
- Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();
-
- }
- StringBuilder sb = new StringBuilder();
- foreach (var item in Common.ltRuleContains)
- {
- sb.AppendLine(item);
- }
- log.Info("rule_contains.txt---->包含" + Common.ltRuleContains.Count() + "个违禁词,内容如下:\r\n" + sb.ToString());
-
-
- string ruleTelPath = "rules\\rule_tel.txt";
- if (File.Exists(ruleTelPath))
- {
- foreach (var item in File.ReadAllLines(ruleTelPath))
- {
- Common.ltRuleTel.Add(item.ToLower());
- }
- }
-
- sb.Clear();
- foreach (var item in Common.ltRuleTel)
- {
- sb.AppendLine(item);
- }
- log.Info("rule_tel.txt---->包含" + Common.ltRuleTel.Count() + "个号码前缀,内容如下:\r\n" + sb.ToString());
-
- }
-
- /// <summary>
- /// 选择表格
- /// </summary>
- /// <param name="sender"></param>
- /// <param name="e"></param>
- private void button2_Click(object sender, EventArgs e)
- {
- try
- {
- OpenFileDialog ofd = new OpenFileDialog();
- ofd.Filter = excelFileFilter;
- if (ofd.ShowDialog() != DialogResult.OK) return;
-
- log.Info("解析中……");
- Application.DoEvents();
-
- Stopwatch sw = new Stopwatch();
- sw.Start(); //开始计时
-
- string excelPath = ofd.FileName;
-
- Workbook workbook = new Workbook(excelPath);
- Cells cells = workbook.Worksheets[0].Cells;
- System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitle
-
- ltImgInfo = new ConcurrentQueue<ImgInfo>();
-
- //遍历
- ImgInfo temp;
- int imgCount = 0;
- foreach (DataRow row in dataTable1.Rows)
- {
- temp = new ImgInfo();
- temp.id = row[0].ToString();
- temp.title = row[1].ToString();
-
- List<MatInfo> list = new List<MatInfo>();
- for (int i = 2; i < cells.MaxColumn + 1; i++)
- {
-
- string tempStr = row[i].ToString();
- if (!string.IsNullOrEmpty(tempStr))
- {
- if (i >= 7)
- {
- List<string> ltScrUrlTemp = Common.GetScrUrl(tempStr);
- if (ltScrUrlTemp.Count > 0)
- {
- foreach (var item in ltScrUrlTemp)
- {
-
- MatInfo matInfo = new MatInfo();
- matInfo.url = item;
- list.Add(matInfo);
- }
- }
- }
- else
- {
- MatInfo matInfo = new MatInfo();
- matInfo.url = tempStr;
- list.Add(matInfo);
- }
- }
- }
- temp.images = list;
- imgCount = imgCount + list.Count();
- ltImgInfo.Enqueue(temp);
-
- //for test
- //if (ltImgInfo.Count()>10)
- //{
- // break;
- //}
- }
- log.Info("解析完毕,一共[" + ltImgInfo.Count + "]条记录,[" + imgCount + "]张图片,耗时:" + sw.ElapsedMilliseconds + "毫秒");
- }
- catch (Exception ex)
- {
- log.Error("解析表格异常:" + ex.Message);
- MessageBox.Show("解析表格异常:" + ex.Message);
- }
- }
-
- void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount)
- {
- txtTotal.Invoke(new Action(() =>
- {
- TimeSpan ts = TimeSpan.FromMilliseconds(time);
- txtTotal.Text = string.Format("下载完成:{0}/{1},识别完成:{2}/{3},违规ID数量:{5},用时:{4}"
- , downloadCount
- , total
- , ocrNum
- , total
- , ts.ToString()
- , vioIDCount
- );
- }));
- }
-
- /// <summary>
- /// 下载识别
- /// </summary>
- /// <param name="sender"></param>
- /// <param name="e"></param>
- private void button1_Click(object sender, EventArgs e)
- {
- if (ltImgInfo.Count == 0)
- {
- MessageBox.Show("请先选择表格!");
- return;
- }
-
- DialogResult result = MessageBox.Show("确认开始下载识别?此操作会清空上一次的数据,请注意备份!", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
- if (result == DialogResult.Yes)
- {
- log.Info("确认开始下载识别!");
- }
- else
- {
- log.Info("取消开始下载识别!");
- return;
- }
-
- if (!Directory.Exists("img"))
- {
- Directory.CreateDirectory("img");
- }
-
- if (!Directory.Exists("ocr_result"))
- {
- Directory.CreateDirectory("ocr_result");
- }
-
- if (!Directory.Exists("result"))
- {
- Directory.CreateDirectory("result");
- }
-
- if (!Directory.Exists("result//img"))
- {
- Directory.CreateDirectory("result//img");
- }
-
- //清空结果
- File.WriteAllText("result//result.txt", "");
- File.WriteAllText("result//result_detail.txt", "");
- // 清空文件夹中的文件
- foreach (string filePath in Directory.GetFiles("result", "*", SearchOption.AllDirectories))
- {
- File.Delete(filePath);
- }
-
- // 写入列标题
- File.WriteAllText("result//result.txt", "id\ttitel\tcontent\r\n");
-
- btnStop.Enabled = true;
- btnStart.Enabled = false;
- chkSaveImg.Enabled = false;
- chkSaveOcr.Enabled = false;
-
- if (chkSaveImg.Checked)
- {
- saveImg = true;
- }
- else
- {
- saveImg = false;
- }
-
- if (chkSaveOcr.Checked)
- {
- saveOcr = true;
- }
- else
- {
- saveOcr = false;
- }
-
- Application.DoEvents();
-
- cts = new CancellationTokenSource();
-
- Stopwatch total = new Stopwatch();
- total.Start(); //开始计时
-
- // 清空队列
- matQueue = new ConcurrentQueue<ImgInfo>();
- //while (!matQueue.IsEmpty)
- //{
- // matQueue.TryDequeue(out _);
- //}
-
-
- ocrNum = 0;//完成OCR识别的数量
- totalCount = ltImgInfo.Count();//图片总数量
- downloadCount = 0;
-
- //下载线程
- int downloadThreadNum = 2;
- for (int i = 0; i < downloadThreadNum; i++)
- {
- Task.Factory.StartNew(() =>
- {
- while (true)
- {
- //判断是否被取消;
- if (cts.Token.IsCancellationRequested)
- {
- return;
- }
-
- if (downloadCount == totalCount)
- {
- log.Info("--------------------------------->下载完成!<----------------------------------");
- return;
- }
-
- ImgInfo imgInfo = new ImgInfo();
- if (ltImgInfo.TryDequeue(out imgInfo))
- {
- //队列容量大于50 休息一秒
- if (matQueue.Count > 50)
- {
- System.Threading.Thread.Sleep(1000);
- }
-
- if (matQueue.Count > 100)
- {
- System.Threading.Thread.Sleep(2000);
- }
-
- int imagesCount = imgInfo.images.Count();
- for (int j = 0; j < imagesCount; j++)
- {
- try
- {
- Stopwatch sw = new Stopwatch();
- sw.Start(); //开始计时
- HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest;
- request.KeepAlive = false;
- request.ServicePoint.Expect100Continue = false;
- request.Timeout = 2000;// 2秒
- request.ReadWriteTimeout = 2000;//2秒
-
- request.ServicePoint.UseNagleAlgorithm = false;
- request.ServicePoint.ConnectionLimit = 65500;
- request.AllowWriteStreamBuffering = false;
- request.Proxy = null;
-
- request.CookieContainer = new CookieContainer();
- request.CookieContainer.Add(new Cookie("AspxAutoDetectCookieSupport", "1") { Domain = new Uri(imgInfo.images[j].url).Host });
-
- HttpWebResponse wresp = (HttpWebResponse)request.GetResponse();
- Stream s = wresp.GetResponseStream();
- Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s);
- s.Dispose();
- wresp.Close();
- wresp.Dispose();
- request.Abort();
-
- sw.Stop();
-
-
- if (saveImg)
- {
- bmp.Save("img//" + imgInfo.id + "_" + j + ".jpg");
- }
-
- var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);
-
- if (mat.Channels() == 4)
- {
- Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);
- }
-
- imgInfo.images[j].mat = mat;
- imgInfo.images[j].name = imgInfo.id + "_" + j;
-
- if (saveImg)
- {
- bmp.Save("img//" + imgInfo.images[j].name + ".jpg");
- }
-
- log.Info(" " + imgInfo.images[j].name + "-->下载用时:" + sw.ElapsedMilliseconds + "毫秒");
- }
- catch (Exception ex)
- {
- log.Error("---->id:" + imgInfo.id + ",url[" + imgInfo.images[j].url + "],下载异常:" + ex.Message);
- }
- }
- matQueue.Enqueue(imgInfo);
- Interlocked.Increment(ref downloadCount);
- }
-
- }
- }, TaskCreationOptions.LongRunning);
-
- }
-
- //识别线程
- Task.Factory.StartNew(() =>
- {
- while (true)
- {
- //判断是否被取消;
- if (cts.Token.IsCancellationRequested)
- {
- return;
- }
-
- if (ocrNum == totalCount)
- {
- log.Info("--------------------------------->识别完成!<----------------------------------");
- return;
- }
-
- ImgInfo imgInfo = new ImgInfo();
- if (matQueue.TryDequeue(out imgInfo))
- {
-
- Stopwatch perID = new Stopwatch();
- perID.Start();//开始计时
- int imagesCount = imgInfo.images.Count();
- for (int j = 0; j < imagesCount; j++)
- {
- //Mat mat= imgInfo.images[j].mat;
- Stopwatch sw = new Stopwatch();
- sw.Start(); //开始计时
- PaddleOcrResult ocrResult = null;
- try
- {
- if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty()))
- {
- ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);
-
- sw.Stop();
- log.Info(" " + imgInfo.images[j].name + "---->识别用时:" + sw.ElapsedMilliseconds + "毫秒");
-
- //string ocrInfo = ocrResult.Text.ToString();
-
- string ocrInfo = string.Join("\n", from x in ocrResult.Regions
- where x.Score > 0.8
- orderby x.Rect.Center.Y, x.Rect.Center.X
- select x.Text);
-
- if (saveOcr)
- {
- File.WriteAllText("ocr_result//" + imgInfo.images[j].name + ".txt", ocrInfo);
- }
-
- //规则校验
- Stopwatch ruleSw = new Stopwatch();
- ruleSw.Start();//开始计时
- ocrInfo = ocrInfo.Trim();
- ocrInfo = ocrInfo.Replace(" ", "");
-
- string words = "";
- string resultInfo = "";
- if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult))
- {
- resultInfo = string.Format("ID:{0},Title:[{1}],------>包含违禁词:{2}", imgInfo.id, imgInfo.title, words);
- log.Info(resultInfo);
-
- //存数据
- File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t包含违禁词:" + words + "\r\n");
- File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
-
- //存图
- Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
- imgInfo.images[j].mat.Dispose();
-
- Interlocked.Increment(ref vioIDCount);
-
- break;
- }
-
- if (Common.RuleTelCheck(ocrInfo, out words, ocrResult))
- {
- resultInfo = string.Format("ID:{0},Title:[{1}],------>疑似包含电话号码:{2}", imgInfo.id, imgInfo.title, words);
- log.Info(resultInfo);
- //File.AppendAllText("result//result.txt", resultInfo+ "\r\n");
- File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t疑似包含电话号码:" + words + "\r\n");
- File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
-
- //存图
- Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
- imgInfo.images[j].mat.Dispose();
-
- Interlocked.Increment(ref vioIDCount);
-
- break;
- }
- imgInfo.images[j].mat.Dispose();
- ruleSw.Stop();
- //log.Info(" " + imgInfo.images[j].name + "---->违禁词校验用时:" + ruleSw.ElapsedMilliseconds + "毫秒");
- }
- }
- catch (Exception ex)
- {
- imgInfo.images[j].mat.Dispose();
- log.Info(" " + imgInfo.images[j].name + "---->识别异常:" + ex.Message);
- }
- }
-
- perID.Stop();
- log.Info("---->id:" + imgInfo.id + ",图片张数[" + imagesCount + "],识别小计用时:" + perID.ElapsedMilliseconds + "毫秒");
- Interlocked.Increment(ref ocrNum);
- ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);
-
- }
- }
- }, TaskCreationOptions.LongRunning);
- }
-
- /// <summary>
- /// 停止
- /// </summary>
- /// <param name="sender"></param>
- /// <param name="e"></param>
- private void button3_Click(object sender, EventArgs e)
- {
- cts.Cancel();
- btnStop.Enabled = false;
- btnStart.Enabled = true;
-
- chkSaveImg.Enabled = true;
- chkSaveOcr.Enabled = true;
- }
- }
- }
下载


评论记录:
回复评论: