讲道理90%的工作都是在做图像处理。。。。opencv各种操作。
一完工赶紧专心复习emmmm
大致步骤:
首先必要的库
import cv2 import numpy as np import matplotlib.pyplot as plt import pytesseract from PIL import Image import os from PyQt5 import QtCore, QtGui, QtWidgets, Qt from PyQt5.QtCore import * from PyQt5.QtWidgets import *
① 预处理(灰度,直方图均衡,滤波)
def stechCr(img): dst = cv2.equalizeHist(img) if debug: cv2.imshow('dst',dst) cv2.waitKey(0) cv2.destroyAllWindows() return dst gray = cv2.fastNlMeansDenoisingColored(img, None, 10, 3, 3, 3) gray=cv2.cvtColor(gray,cv2.COLOR_BGR2GRAY) img,gray=makepic(gray)
② 提取ROI,主要用opencv的findContours找到背景下最大的轮廓,也就是卡面本身。
def makepic(Img): Img=stechCr(Img) ret, img = cv2.threshold(Img, lot, 255, cv2.THRESH_BINARY) image, contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) res=contours[0] carea=cv2.contourArea(res) tmpImg=Img cv2.drawContours(tmpImg,contours,-1,(0,0,255),3) cv2.imwrite('contours.png',tmpImg) for i in range(len(contours)): cnt=contours[i] narea=cv2.contourArea(cnt) if carea < narea : carea=narea res=cnt rect = cv2.minAreaRect(res) box = cv2.boxPoints(rect) box = np.int0(box) h = abs(box[0][1] - box[2][1]) w = abs(box[0][0] - box[2][0]) Xs = [i[0] for i in box] Ys = [i[1] for i in box] x1 = min(Xs) y1 = min(Ys) r1=cv2.resize(Img[y1:y1 + h, x1:x1 + w],(560*2,340*2),interpolation=cv2.INTER_CUBIC) r2=cv2.resize(img[y1:y1 + h, x1:x1 + w],(560*2,340*2),interpolation=cv2.INTER_CUBIC) if debug: cv2.imshow('ROI',Img[y1:y1 + h, x1:x1 + w]) cv2.waitKey(0) cv2.destroyAllWindows() return r1,r2
③ (这里还应该做一个镜头角度矫正,但是我还没搞明白那个东西所以就没做了
④ ROI反向二值化,然后膨胀。膨胀后的效果如下图(上面的过程比较简单而且泄露个人信息就不放了。。。。)
def prepare(gray): ret, binary = cv2.threshold(gray, lot, 255, cv2.THRESH_BINARY_INV)#反向二值化 opening=binary ele = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 17)) dilation = cv2.dilate(opening, ele, iterations=1)#膨胀操作 cv2.imwrite("BI_img.png", binary) cv2.imwrite("AfterDilate.png", dilation) if debug : cv2.imshow("BI_img", binary) cv2.waitKey(0) cv2.destroyAllWindows() cv2.imshow("AfterDilate", dilation) cv2.waitKey(0) cv2.destroyAllWindows() return dilation
⑤ 找出文本区域:再用一次findContours,找出来各个白色块的轮廓,然后用cv2.minAreaRect()找到最小包围矩形,并且把不符号要求的矩形都剔除掉(太长太宽太大太小之类的,参数需要调),最后找到的区域切割出去。
def findTextRegion(img): region = [] # 查找膨胀块轮廓 image, contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for i in range(len(contours)): cnt = contours[i] area = cv2.contourArea(cnt) # 面积过大过小的都去掉 if (area < 4*1500 or area >2*15000):#调参使得适合一卡通 continue # 找到最小的矩形,该矩形可能有方向 rect = cv2.minAreaRect(cnt) # box是四个点的坐标 box = cv2.boxPoints(rect) box = np.int0(box) height = abs(box[0][1] - box[2][1]) width = abs(box[0][0] - box[2][0]) # 筛选矩形 if (height > width * 1.2): continue if (height * 15 < width): continue #if (width > img.shape[1] / 2 and height > img.shape[0] / 20): region.append(box) return region
方框图出来的结果
切割出来的各个小块
⑥ 最后就是把切割出来的图片送给pytessract-ocr输出就可以了,我这里是存在了infoDict的字典里。
def ocrIdCard(imgPath): fc=len(imgPath) pre="";af="" for i in range(fc-1,-1,-1): if imgPath[i]=="/": pre=imgPath[:i] af=imgPath[i+1:] break os.chdir(pre) it=os.getcwd() img = cv2.imread(af) infoDict={"学号":"", "姓名":"", "学院":"", "班级":"" } print('start...\n') idImgs = detect(img) tessdata_dir_config = '--tessdata-dir "C:\\Program Files (x86)\\Tesseract-OCR\\tessdata"' tesseract_cmd = "C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe" i=1 for idImg in idImgs: #print(i) if debug: cv2.imshow('temp'+str(i),idImg) cv2.waitKey(0) cv2.destroyAllWindows() image = Image.fromarray(idImg) result = pytesseract.image_to_string(image, lang='chi_sim',config=tessdata_dir_config,) result=result.replace(' ','') if(len(result)!=0): flag=1 for a in result: if (a<='z' and a>='a') or (a<='Z' and a>='A'): flag=0 break if flag: if len(result) == 11: infoDict["学号"]=result elif len(result) == 7: infoDict["班级"]=result elif result[-2:]=="学院": infoDict["学院"]=result else: infoDict["姓名"]=result i+=1 return infoDict
⑦ 图形化界面,话说从C#过来的我用python写图形化真的是难受的一批。。PyQt真的相比C#太弟弟了。。
class oneCardOcrUi(QtWidgets.QWidget): def __init__(self): super(oneCardOcrUi,self).__init__() self.setupUi() self.infoCard={} def setupUi(self): self.setWindowTitle("一卡通识别") self.layout = QtWidgets.QGridLayout() layout=self.layout self.setGeometry(600, 600, 400, 400) self.nameLabel = QtWidgets.QLabel("姓名") self.nameLineEdit = QtWidgets.QLineEdit("") self.idLabel = QtWidgets.QLabel("学号") self.idLineEdit = QtWidgets.QLineEdit("") self.scLabel = QtWidgets.QLabel("学院") self.scLineEdit = QtWidgets.QLineEdit("") self.clLabel = QtWidgets.QLabel("班级") self.clLineEdit = QtWidgets.QLineEdit("") layout.addWidget(self.nameLabel,1,0) layout.addWidget(self.nameLineEdit,1,1) layout.addWidget(self.idLabel, 2, 0) layout.addWidget(self.idLineEdit, 2, 1) layout.addWidget(self.scLabel,3,0) layout.addWidget(self.scLineEdit,3,1) layout.addWidget(self.clLabel,4,0) layout.addWidget(self.clLineEdit,4,1) layout.setColumnStretch(1, 10) open_Btn = QtWidgets.QPushButton('打开图片') save_Btn = QtWidgets.QPushButton('保存为csv') save_Btn.clicked.connect(self.savefile) open_Btn.clicked.connect(self.openfile) layout.addWidget(open_Btn) layout.addWidget(save_Btn) self.setLayout(layout) self.show() def savefile(self): savefile_name=QtWidgets.QFileDialog.getSaveFileName(self,'选择存储方式','','csv文件(*.csv )') if debug: print(savefile_name) print(self.infoCard) if savefile_name is None: return dataframe = pd.DataFrame(self.infoCard,index=[0]) dataframe.to_csv(savefile_name[0],index=[0],sep=',',encoding="utf_8_sig") def openfile(self): openfile_name = QtWidgets.QFileDialog.getOpenFileName(self,'选择文件','','图片文件(*.jpeg , *.png, *.jpg)') if openfile_name is None: return self.infoCard=ocrIdCard(openfile_name[0]) self.nameLineEdit.setText(self.infoCard["姓名"]) self.idLineEdit.setText(self.infoCard["学号"]) self.scLineEdit.setText(self.infoCard["学院"]) self.clLineEdit.setText(self.infoCard["班级"]) if debug: print(self.infoCard) print(self.nameLineEdit.text()) if __name__ == "__main__": import sys app = QtWidgets.QApplication(sys.argv) ui = oneCardOcrUi() #ui.setupUi()这两行坑了我好久。。。mmp写在外面就不能实时刷新文本框了 #ui.show() sys.exit(app.exec_())
最后是几个过程中遇到的bug和解决方法
-
Cannot find existing PyQt5 plugin directories 解决:https://blog.csdn.net/poppyque/article/details/85256241
- UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0xce in position 130: invalid continuation b 解决:https://blog.csdn.net/qq_41185868/article/details/80599390
-
打包完成后双击exe出现的bug:This application failed to start because it could not find or load the Qt platform plugin “windows 解决:https://blog.csdn.net/Perfect_Accepted/article/details/80140362
主要参考:
https://blog.csdn.net/missyougoon/article/details/81632166
https://www.jianshu.com/p/f57165f34839
https://docs.opencv.org/3.3.0/d3/dc0/group__imgproc__shape.html#ga17ed9f5d79ae97bd4c7cf18403e1689a
https://www.jianshu.com/p/98e8218b2309
https://www.cnblogs.com/ansang/p/7895075.html
完整的代码在github