爱看下雨的大黄狗 发布的帖子
萌新一枚,我在编写一个脚本时候,想通过ocr识别文本定位某些读不出来的控件,但是一直定位的位置一直是错误的,困扰我了很久
后来发现貌似是我跑脚本的设备平板是2K屏幕的原因,我尝试了一下一个知乎上面的脚本,分别在一1080p的手机和2K的平板上面跑了一下。结果如下面图(手机1080X2230,平板2560X1600):
在手机上面的ocr输出的文字的位置都是正确的,但是2K屏幕输出的文字位置都明显错位了。(实测夜间模式开启与否不影响结果)
我简单推理了一下大概率是gmlkit.ocr或paddle.ocr对输入的图片尺寸有要求,如果大于某个尺寸应该会将图片分块进行处理,输出的识别出来文字每一个结果的bounds并非实际上输入原图的情况。尝试过将截屏二值化但是在2K屏幕识别截屏上没有效果。
有没有哪位大佬懂具体是什么情况?(原谅我在文档中实在是没有找到),还有对这种超过ocr模块像素要求的图片有没有什么好的处理方法。
附测试用的知乎上的代码:
let currentEngine = engines.myEngine()
let runningEngines = engines.all()
let currentSource = currentEngine.getSource() + ''
if (runningEngines.length > 1) {
runningEngines.forEach(compareEngine => {
let compareSource = compareEngine.getSource() + ''
if (currentEngine.id !== compareEngine.id && compareSource === currentSource) {
// 强制关闭同名的脚本
compareEngine.forceStop()
}
})
}
if (!requestScreenCapture()) {
toastLog('请求截图权限失败')
exit()
}
sleep(1000)
// 识别结果和截图信息
let result = []
let img = null
let running = true
let capturing = true
/**
* 截图并识别OCR文本信息
*/
function captureAndOcr() {
capturing = true
img && img.recycle()
img = captureScreen()
if (!img) {
toastLog('截图失败')
}
let start = new Date()
//结果转数组:层级:3
result = gmlkit.ocr(img,"zh").toArray(3);
log(result);
toastLog('耗时' + (new Date() - start) + 'ms')
capturing = false
}
captureAndOcr()
// 获取状态栏高度
let offset = -getStatusBarHeightCompat()
//let offset = 0;
// 绘制识别结果
let window = floaty.rawWindow(
<canvas id="canvas" layout_weight="1" />
);
// 设置悬浮窗位置
ui.post(() => {
window.setPosition(0, offset)
window.setSize(device.width, device.height)
window.setTouchable(false)
})
// 操作按钮
let clickButtonWindow = floaty.rawWindow(
<vertical>
<button id="captureAndOcr" text="截图识别" />
<button id="closeBtn" text="退出" />
</vertical>
);
ui.run(function () {
clickButtonWindow.setPosition(device.width / 2 - ~~(clickButtonWindow.getWidth() / 2), device.height * 0.65)
})
// 点击识别
clickButtonWindow.captureAndOcr.click(function () {
result = []
ui.run(function () {
clickButtonWindow.setPosition(device.width, device.height)
})
setTimeout(() => {
threads.start(()=>{
captureAndOcr()
ui.run(function () {
clickButtonWindow.setPosition(device.width / 2 - ~~(clickButtonWindow.getWidth() / 2), device.height * 0.65)
})
})
}, 500)
})
// 点击关闭
clickButtonWindow.closeBtn.click(function () {
exit()
})
let Typeface = android.graphics.Typeface
let paint = new Paint()
paint.setStrokeWidth(1)
paint.setTypeface(Typeface.DEFAULT_BOLD)
paint.setTextAlign(Paint.Align.LEFT)
paint.setAntiAlias(true)
paint.setStrokeJoin(Paint.Join.ROUND)
paint.setDither(true)
window.canvas.on('draw', function (canvas) {
if (!running || capturing) {
return
}
// 清空内容
canvas.drawColor(0xFFFFFF, android.graphics.PorterDuff.Mode.CLEAR)
if (result && result.length > 0) {
for (let i = 0; i < result.length; i++) {
let ocrResult = result[i]
drawRectAndText(ocrResult.text + ' #信心:' + ocrResult.confidence.toFixed(2), ocrResult.bounds, '#00ff00', canvas, paint);
}
}
})
setInterval(() => { }, 10000)
events.on('exit', () => {
// 标记停止 避免canvas导致闪退
running = false
// 回收图片
img && img.recycle()
// 撤销监听
window.canvas.removeAllListeners()
})
/**
* 绘制文本和方框
*
* @param {*} desc
* @param {*} rect
* @param {*} colorStr
* @param {*} canvas
* @param {*} paint
*/
function drawRectAndText (desc, rect, colorStr, canvas, paint) {
let color = colors.parseColor(colorStr)
paint.setStrokeWidth(1)
paint.setStyle(Paint.Style.STROKE)
// 反色
paint.setARGB(255, 255 - (color >> 16 & 0xff), 255 - (color >> 8 & 0xff), 255 - (color & 0xff))
canvas.drawRect(rect, paint)
paint.setARGB(255, color >> 16 & 0xff, color >> 8 & 0xff, color & 0xff)
paint.setStrokeWidth(1)
paint.setTextSize(20)
paint.setStyle(Paint.Style.FILL)
canvas.drawText(desc, rect.left, rect.top, paint)
paint.setTextSize(10)
paint.setStrokeWidth(1)
paint.setARGB(255, 0, 0, 0)
}
/**
* 获取状态栏高度
*
* @returns
*/
function getStatusBarHeightCompat () {
let result = 0
let resId = context.getResources().getIdentifier("status_bar_height", "dimen", "android")
if (resId > 0) {
result = context.getResources().getDimensionPixelOffset(resId)
}
if (result <= 0) {
result = context.getResources().getDimensionPixelOffset(R.dimen.dimen_25dp)
}
return result
}