frzyc · tooflesswulf · Dec 28, 2023 · Dec 29, 2023 · Dec 29, 2023 · Dec 29, 2023
diff --git a/.yarn/cache/@types-node-npm-20.10.5-9464a4540b-e216b679f5.zip b/.yarn/cache/@types-node-npm-20.10.5-9464a4540b-e216b679f5.zip
diff --git a/.yarn/cache/flatbuffers-npm-1.12.0-51ee5c20bf-8a6461ec80.zip b/.yarn/cache/flatbuffers-npm-1.12.0-51ee5c20bf-8a6461ec80.zip
diff --git a/.yarn/cache/guid-typescript-npm-1.0.9-01fb89a1bf-829dd87866.zip b/.yarn/cache/guid-typescript-npm-1.0.9-01fb89a1bf-829dd87866.zip
diff --git a/.yarn/cache/long-npm-5.2.3-61dddb7586-885ede7c3d.zip b/.yarn/cache/long-npm-5.2.3-61dddb7586-885ede7c3d.zip
diff --git a/.yarn/cache/onnxruntime-common-npm-1.16.3-a42fa88d62-b8b83fd318.zip b/.yarn/cache/onnxruntime-common-npm-1.16.3-a42fa88d62-b8b83fd318.zip
diff --git a/.yarn/cache/onnxruntime-web-npm-1.16.3-3de219cbcb-e8b71733f9.zip b/.yarn/cache/onnxruntime-web-npm-1.16.3-3de219cbcb-e8b71733f9.zip
diff --git a/.yarn/cache/platform-npm-1.3.6-8c3cef9352-6f472a09c6.zip b/.yarn/cache/platform-npm-1.3.6-8c3cef9352-6f472a09c6.zip
diff --git a/.yarn/cache/protobufjs-npm-7.2.5-3439c554a5-3770a07211.zip b/.yarn/cache/protobufjs-npm-7.2.5-3439c554a5-3770a07211.zip
diff --git a/.yarn/cache/undici-types-npm-5.26.5-de4f7c7bb9-3192ef6f3f.zip b/.yarn/cache/undici-types-npm-5.26.5-de4f7c7bb9-3192ef6f3f.zip
diff --git a/apps/frontend/src/app/PageArtifact/index.tsx b/apps/frontend/src/app/PageArtifact/index.tsx
@@ -61,7 +61,7 @@ export default function PageArtifact() {
   const { database } = useContext(DatabaseContext)
   const artifactDisplayState = useDisplayArtifact()
 
-  const [showEditor, onShowEditor, onHideEditor] = useBoolState(false)
+  const [showEditor, onShowEditor, onHideEditor] = useBoolState(true)
 
   const [showDup, onShowDup, onHideDup] = useBoolState(false)
 

diff --git a/apps/frontend/src/assets/simplenet.onnx b/apps/frontend/src/assets/simplenet.onnx
diff --git a/libs/gi-art-scanner/src/lib/processImg.ts b/libs/gi-art-scanner/src/lib/processImg.ts
@@ -43,6 +43,8 @@
   parseSubstats,
 } from './parse'
 
+import { processEntryML } from './processImgML'
+
 export type Processed = {
   fileName: string
   imageURL: string
@@ -68,6 +70,21 @@
   const imageData = await urlToImageData(imageURL)
 
   const debugImgs = debug ? ({} as Record<string, string>) : undefined
+  if (true) {
+    const { artifact, texts, imageURL } = await processEntryML(
+      imageData,
+      textsFromImage,
+      debugImgs
+    )
+    return {
+      fileName: fName,
+      imageURL,
+      artifact,
+      texts,
+      debugImgs,
+    }
+  }
+
   const artifactCardImageData = verticallyCropArtifactCard(imageData, debugImgs)
   const artifactCardCanvas = imageDataToCanvas(artifactCardImageData)
 
@@ -335,7 +352,7 @@
   return cropped
 }
 
-function parseRarity(
+export function parseRarity(
   headerData: ImageData,
   debugImgs?: Record<string, string>
 ) {

diff --git a/libs/gi-art-scanner/src/lib/processImgML.ts b/libs/gi-art-scanner/src/lib/processImgML.ts
@@ -0,0 +1,241 @@
+import * as ort from 'onnxruntime-web'
+import { lockColor } from './consts'
+import {
+  crop,
+  imageDataToCanvas,
+  resize,
+  drawBox,
+  invert,
+  histogramAnalysis,
+  darkerColor,
+  lighterColor,
+} from '@genshin-optimizer/img-util'
+import { PSM } from 'tesseract.js'
+import { parseRarity } from './processImg'
+import {
+  parseMainStatKeys,
+  parseMainStatValues,
+  parseSetKeys,
+  parseSlotKeys,
+  parseSubstats,
+} from './parse'
+import { findBestArtifact } from './findBestArtifact'
+
+type Box = {
+  x: number
+  y: number
+  w: number
+  h: number
+}
+type MLBoxes = {
+  title: Box
+  slot: Box
+  mainstat: Box
+  level: Box
+  rarity: Box
+  substats: Box
+  set: Box
+  lock: Box
+  bbox: Box
+}
+
+function getBox(
+  result: ort.TypedTensor<'float32'>,
+  height: number,
+  width: number,
+  i: number,
+  offset?: { x1?: number; y1?: number }
+): Box {
+  const x1 = result.data[4 * i] * width,
+    y1 = result.data[4 * i + 1] * height,
+    x2 = result.data[4 * i + 2] * width,
+    y2 = result.data[4 * i + 3] * height
+
+  const w = x2 - x1,
+    h = y2 - y1
+  return { x: x1 + (offset?.x1 ?? 0), y: y1 + (offset?.y1 ?? 0), w, h }
+}
+function padBox(box: Box, pad: number): Box {
+  return {
+    x: Math.max(box.x - (pad * box.w) / 2, 0),
+    y: Math.max(box.y - (pad * box.h) / 2, 0),
+    w: box.w * (1 + pad),
+    h: box.h * (1 + pad),
+  }
+}
+function box2CropOption(box: Box, pad?: number) {
+  if (pad) box = padBox(box, pad)
+  return {
+    x1: box.x,
+    y1: box.y,
+    x2: box.x + box.w,
+    y2: box.y + box.h,
+  }
+}
+
+function prepareForOnnx(imageData: ImageData): Float32Array {
+  // Expects (200, 200, 3) image. Re-order + scale data to network's expected domain.
+
+  const imageBuffer = new Float32Array(200 * 200 * 3)
+  imageBuffer.fill(0)
+  const normalization = [
+    { mu: 0.485, std: 0.229 },
+    { mu: 0.456, std: 0.224 },
+    { mu: 0.406, std: 0.225 },
+  ]
+  const _i = 1
+  const _j = 4 * imageData.width // 4 * 200
+  const _k = 4
+
+  for (let i = 0; i < 3; i++) {
+    const { mu, std } = normalization[i]
+    for (let j = 0; j < 200; j++) {
+      for (let k = 0; k < 200; k++) {
+        const v = imageData.data[_i * i + _j * j + _k * k] / 255.0
+        imageBuffer[i * 200 * 200 + j * 200 + k] = (v - mu) / std
+      }
+    }
+  }
+  return imageBuffer
+}
+
+async function doInference(
+  imageData: ImageData,
+  session: ort.InferenceSession,
+  cropOptions: {
+    x1?: number
+    x2?: number
+    y1?: number
+    y2?: number
+  },
+  debugImgs?: Record<string, string>
+): Promise<MLBoxes> {
+  const imageCropped = crop(imageDataToCanvas(imageData), cropOptions)
+  const imageSized = resize(imageCropped, { width: 200, height: 200 })
+  const imageBuffer = prepareForOnnx(imageSized)
+
+  if (debugImgs)
+    debugImgs['MLInput'] = imageDataToCanvas(imageSized).toDataURL()
+
+  const feeds = {
+    input1: new ort.Tensor('float32', imageBuffer, [1, 3, 200, 200]),
+  }
+  const results = await session.run(feeds)
+  const result = results['output1'] as ort.TypedTensor<'float32'>
+  const h = imageCropped.height,
+    w = imageCropped.width
+  const out = {
+    title: getBox(result, h, w, 0, cropOptions),
+    slot: getBox(result, h, w, 1, cropOptions),
+    mainstat: getBox(result, h, w, 2, cropOptions),
+    level: getBox(result, h, w, 3, cropOptions),
+    rarity: getBox(result, h, w, 4, cropOptions),
+    substats: getBox(result, h, w, 5, cropOptions),
+    set: getBox(result, h, w, 6, cropOptions),
+    lock: getBox(result, h, w, 7, cropOptions),
+    bbox: getBox(result, h, w, 8, cropOptions),
+  }
+  // Manually fix inconsistent substat box width
+  out.substats.w = out.lock.x - out.substats.x
+  return out
+}
+
+export async function processEntryML(
+  imageDataRaw: ImageData,
+  textsFromImage: (
+    imageData: ImageData,
+    options?: object | undefined
+  ) => Promise<string[]>,
+  debugImgs?: Record<string, string>
+) {
+  // const session = await ort.InferenceSession.create('https://github.com/tooflesswulf/genshin-scanner/raw/main/onnx/simplenet.onnx')
+  const session = await ort.InferenceSession.create('./assets/simplenet.onnx', {
+    executionProviders: ['webgl'],
+  })
+
+  const mlBoxes0 = await doInference(imageDataRaw, session, {}, debugImgs)
+  const mlBoxes = await doInference(
+    imageDataRaw,
+    session,
+    box2CropOption(mlBoxes0.bbox, 0.2),
+    debugImgs
+  )
+
+  const rawCanvas = imageDataToCanvas(imageDataRaw)
+  const titleCrop = crop(rawCanvas, box2CropOption(mlBoxes.title, 0.1))
+  const titleText = textsFromImage(titleCrop)
+
+  const slotCrop = crop(rawCanvas, box2CropOption(mlBoxes.slot, 0.1))
+  const slotText = textsFromImage(slotCrop)
+
+  const levelCrop = invert(crop(rawCanvas, box2CropOption(mlBoxes.level, 0.1)))
+  const levelText = textsFromImage(levelCrop)
+
+  const mainstatCrop = invert(
+    crop(rawCanvas, box2CropOption(mlBoxes.mainstat, 0.1))
+  )
+  const mainstatText = textsFromImage(mainstatCrop, {
+    tessedit_pageseg_mode: PSM.SPARSE_TEXT,
+  })
+
+  const substatCrop = crop(rawCanvas, box2CropOption(mlBoxes.substats, 0.1))
+  const substatText = textsFromImage(substatCrop)
+
+  const setCrop = crop(rawCanvas, box2CropOption(mlBoxes.set, 0.1))
+  const setText = textsFromImage(setCrop)
+
+  const lockCrop = crop(rawCanvas, box2CropOption(mlBoxes.lock, 0.1))
+  const lockHisto = histogramAnalysis(
+    lockCrop,
+    darkerColor(lockColor),
+    lighterColor(lockColor)
+  )
+  const locked = lockHisto.filter((v) => v > 5).length > 5
+
+  const rarityCrop = crop(rawCanvas, box2CropOption(mlBoxes.rarity, 0.1))
+  const rarity = parseRarity(rarityCrop, debugImgs)
+
+  const [artifact, texts] = findBestArtifact(
+    new Set([rarity]),
+    parseSetKeys(await setText),
+    parseSlotKeys(await slotText),
+    parseSubstats(await substatText),
+    parseMainStatKeys(await mainstatText),
+    parseMainStatValues(await mainstatText),
+    '',
+    locked
+  )
+
+  const canvasRaw = imageDataToCanvas(imageDataRaw)
+  drawBox(canvasRaw, mlBoxes.title, { r: 31, g: 119, b: 180, a: 80 })
+  drawBox(canvasRaw, mlBoxes.slot, { r: 255, g: 127, b: 14, a: 80 })
+  drawBox(canvasRaw, mlBoxes.mainstat, { r: 44, g: 160, b: 44, a: 80 })
+  drawBox(canvasRaw, mlBoxes.level, { r: 214, g: 39, b: 40, a: 80 })
+  drawBox(canvasRaw, mlBoxes.rarity, { r: 128, g: 103, b: 189, a: 80 })
+  drawBox(canvasRaw, mlBoxes.substats, { r: 140, g: 86, b: 75, a: 80 })
+  drawBox(canvasRaw, mlBoxes.set, { r: 227, g: 119, b: 194, a: 80 })
+  drawBox(canvasRaw, mlBoxes.lock, { r: 188, g: 189, b: 34, a: 80 })
+  drawBox(canvasRaw, mlBoxes.bbox, { r: 127, g: 127, b: 127, a: 60 })
+  if (debugImgs) {
+    debugImgs['MLBoxesFull'] = canvasRaw.toDataURL()
+    debugImgs['slotCrop'] = imageDataToCanvas(slotCrop).toDataURL()
+    debugImgs['levelCrop'] = imageDataToCanvas(levelCrop).toDataURL()
+    debugImgs['mainstatCrop'] = imageDataToCanvas(mainstatCrop).toDataURL()
+    debugImgs['substatCrop'] = imageDataToCanvas(substatCrop).toDataURL()
+    debugImgs['setCrop'] = imageDataToCanvas(setCrop).toDataURL()
+    debugImgs['lockCrop'] = imageDataToCanvas(lockCrop).toDataURL()
+    debugImgs['rarityCrop'] = imageDataToCanvas(rarityCrop).toDataURL()
+  }
+
+  const cropOp = box2CropOption(mlBoxes0.bbox, 0.2)
+  const canvas = imageDataToCanvas(crop(canvasRaw, cropOp))
+  console.log('DETECTION: ', { artifact, texts })
+  console.log('TEXT:', {
+    slotText,
+    levelText,
+    mainstatText,
+    substatText,
+    setText,
+  })
+  return { artifact, texts, imageURL: canvas.toDataURL() }
+}
diff --git a/libs/img-util/src/canvas.ts b/libs/img-util/src/canvas.ts
@@ -17,6 +17,20 @@ export function drawline(
   return canvas
 }
 
+export function drawBox(
+  canvas: HTMLCanvasElement,
+  { x, y, w, h }: { x: number; y: number; w: number; h: number },
+  color: Color
+) {
+  const ctx = canvas.getContext('2d')!
+  ctx.fillStyle = `rgba(${color.r},${color.g},${color.b},${
+    color.a ? color.a / 255 : 1
+  })`
+  ctx.fillRect(x, y, w, h)
+
+  return canvas
+}
+
 export function drawHistogram(
   canvas: HTMLCanvasElement,
   histogram: number[],

diff --git a/libs/img-util/src/imageData.ts b/libs/img-util/src/imageData.ts
@@ -47,6 +47,67 @@ export function crop(srcCanvas: HTMLCanvasElement, options: CropOptions) {
   return ctx.getImageData(x1, y1, x2 - x1, y2 - y1)
 }
 
+function interpolate_bilinear(
+  image: ImageData,
+  x: number,
+  y: number,
+  i: number
+) {
+  const x1 = x === image.width ? x - 1 : Math.floor(x),
+    x2 = x1 + 1
+  const y1 = y === image.height ? y - 1 : Math.floor(y),
+    y2 = y1 + 1
+  const ch = 4
+  const _x = ch,
+    _y = image.width * ch
+
+  const q11 = (x2 - x) * (y2 - y) * image.data[i + _x * x1 + _y * y1]
+  const q21 = (x - x1) * (y2 - y) * image.data[i + _x * x2 + _y * y1]
+  const q12 = (x2 - x) * (y - y1) * image.data[i + _x * x1 + _y * y2]
+  const q22 = (x - x1) * (y - y1) * image.data[i + _x * x2 + _y * y2]
+  return q11 + q21 + q12 + q22
+}
+export function resize(
+  imageData: ImageData,
+  options: { width?: number; height?: number }
+): ImageData {
+  const { width = imageData.width, height = imageData.height } = options
+
+  const dataBuffer = new Uint8ClampedArray(width * height * 4)
+  const sx = (width - 1) / (imageData.width - 1)
+  const sy = (height - 1) / (imageData.height - 1)
+  for (let x = 0; x < width; x++) {
+    for (let y = 0; y < height; y++) {
+      for (let i = 0; i < 4; i++) {
+        dataBuffer[x * 4 + y * width * 4 + i] = interpolate_bilinear(
+          imageData,
+          x / sx,
+          y / sy,
+          i
+        )
+      }
+    }
+  }
+
+  const resized = new ImageData(dataBuffer, width, height)
+  return resized
+}
+export function invert(imageData: ImageData) {
+  const width = imageData.width,
+    height = imageData.height
+
+  const invDataBuffer = new Uint8ClampedArray(width * height * 4)
+  for (let i = 0; i < width * height * 4; i++) {
+    if (i % 4 == 3) {
+      invDataBuffer[i] = imageData.data[i]
+      continue
+    }
+    invDataBuffer[i] = 255 - imageData.data[i]
+  }
+
+  return new ImageData(invDataBuffer, width, height)
+}
+
 export const fileToURL = (file: File): Promise<string> =>
   new Promise((resolve) => {
     const reader = new FileReader()

diff --git a/package.json b/package.json
@@ -116,6 +116,7 @@
     "jsonwebtoken": "^9.0.2",
     "next": "14.0.3",
     "next-auth": "^4.23.2",
+    "onnxruntime-web": "^1.16.3",
     "passport": "^0.6.0",
     "passport-jwt": "^4.0.1",
     "react": "18.2.0",