萌新一枚,我在编写一个脚本时候,想通过ocr识别文本定位某些读不出来的控件,但是一直定位的位置一直是错误的,困扰我了很久
后来发现貌似是我跑脚本的设备平板是2K屏幕的原因,我尝试了一下一个知乎上面的脚本,分别在一1080p的手机和2K的平板上面跑了一下。结果如下面图(手机1080X2230,平板2560X1600):
Screenshot_2024-12-16-12-57-13-738_com.android.settings.jpg
IMG_20241216_131516.jpg
在手机上面的ocr输出的文字的位置都是正确的,但是2K屏幕输出的文字位置都明显错位了。(实测夜间模式开启与否不影响结果)
我简单推理了一下大概率是gmlkit.ocr或paddle.ocr对输入的图片尺寸有要求,如果大于某个尺寸应该会将图片分块进行处理,输出的识别出来文字每一个结果的bounds并非实际上输入原图的情况。尝试过将截屏二值化但是在2K屏幕识别截屏上没有效果。
有没有哪位大佬懂具体是什么情况?(原谅我在文档中实在是没有找到),还有对这种超过ocr模块像素要求的图片有没有什么好的处理方法。
附测试用的知乎上的代码:

let currentEngine = engines.myEngine()
let runningEngines = engines.all()
let currentSource = currentEngine.getSource() + ''
if (runningEngines.length > 1) {
  runningEngines.forEach(compareEngine => {
    let compareSource = compareEngine.getSource() + ''
    if (currentEngine.id !== compareEngine.id && compareSource === currentSource) {
      // 强制关闭同名的脚本
      compareEngine.forceStop()
    }
  })
}

if (!requestScreenCapture()) {
  toastLog('请求截图权限失败')
  exit()
}

sleep(1000)

// 识别结果和截图信息
let result = []
let img = null
let running = true
let capturing = true

/**
 * 截图并识别OCR文本信息
 */
function captureAndOcr() {
  capturing = true
  img && img.recycle()
  img = captureScreen()
  if (!img) {
    toastLog('截图失败')
  }
  let start = new Date()
  //结果转数组:层级:3
  result = gmlkit.ocr(img,"zh").toArray(3);
  log(result);
  toastLog('耗时' + (new Date() - start) + 'ms')
  capturing = false
}

captureAndOcr()

// 获取状态栏高度
let offset = -getStatusBarHeightCompat()
//let offset = 0;

// 绘制识别结果
let window = floaty.rawWindow(
  <canvas id="canvas" layout_weight="1" />
);

// 设置悬浮窗位置
ui.post(() => {
  window.setPosition(0, offset)
  window.setSize(device.width, device.height)
  window.setTouchable(false)
})

// 操作按钮
let clickButtonWindow = floaty.rawWindow(
  <vertical>
    <button id="captureAndOcr" text="截图识别" />
    <button id="closeBtn" text="退出" />
  </vertical>
);
ui.run(function () {
  clickButtonWindow.setPosition(device.width / 2 - ~~(clickButtonWindow.getWidth() / 2), device.height * 0.65)
})

// 点击识别
clickButtonWindow.captureAndOcr.click(function () {
  result = []
  ui.run(function () {
    clickButtonWindow.setPosition(device.width, device.height)
  })
  setTimeout(() => {
    threads.start(()=>{
      captureAndOcr()
      ui.run(function () {
        clickButtonWindow.setPosition(device.width / 2 - ~~(clickButtonWindow.getWidth() / 2), device.height * 0.65)
      })
    })
  }, 500)
})

// 点击关闭
clickButtonWindow.closeBtn.click(function () {
  exit()
})

let Typeface = android.graphics.Typeface
let paint = new Paint()
paint.setStrokeWidth(1)
paint.setTypeface(Typeface.DEFAULT_BOLD)
paint.setTextAlign(Paint.Align.LEFT)
paint.setAntiAlias(true)
paint.setStrokeJoin(Paint.Join.ROUND)
paint.setDither(true)
window.canvas.on('draw', function (canvas) {
  if (!running || capturing) {
    return
  }
  // 清空内容
  canvas.drawColor(0xFFFFFF, android.graphics.PorterDuff.Mode.CLEAR)
  if (result && result.length > 0) {
    for (let i = 0; i < result.length; i++) {
      let ocrResult = result[i]
      drawRectAndText(ocrResult.text + ' #信心:' + ocrResult.confidence.toFixed(2), ocrResult.bounds, '#00ff00', canvas, paint);
    }
  }
})

setInterval(() => { }, 10000)
events.on('exit', () => {
  // 标记停止 避免canvas导致闪退
  running = false
  // 回收图片
  img && img.recycle()
  // 撤销监听
  window.canvas.removeAllListeners()

})

/**
 * 绘制文本和方框
 *
 * @param {*} desc
 * @param {*} rect
 * @param {*} colorStr
 * @param {*} canvas
 * @param {*} paint
 */
function drawRectAndText (desc, rect, colorStr, canvas, paint) {
  let color = colors.parseColor(colorStr)

  paint.setStrokeWidth(1)
  paint.setStyle(Paint.Style.STROKE)
  // 反色
  paint.setARGB(255, 255 - (color >> 16 & 0xff), 255 - (color >> 8 & 0xff), 255 - (color & 0xff))
  canvas.drawRect(rect, paint)
  paint.setARGB(255, color >> 16 & 0xff, color >> 8 & 0xff, color & 0xff)
  paint.setStrokeWidth(1)
  paint.setTextSize(20)
  paint.setStyle(Paint.Style.FILL)
  canvas.drawText(desc, rect.left, rect.top, paint)
  paint.setTextSize(10)
  paint.setStrokeWidth(1)
  paint.setARGB(255, 0, 0, 0)
}

/**
 * 获取状态栏高度
 *
 * @returns
 */
function getStatusBarHeightCompat () {
  let result = 0
  let resId = context.getResources().getIdentifier("status_bar_height", "dimen", "android")
  if (resId > 0) {
    result = context.getResources().getDimensionPixelOffset(resId)
  }
  if (result <= 0) {
    result = context.getResources().getDimensionPixelOffset(R.dimen.dimen_25dp)
  }
  return result
}