labelExtraction.py

import os
import re
import glob
import sys
import time
import random
import math
import time
import torch
import numpy as np
from PIL import Image
from transform import Colorize

#This function is used for numerical sorting of file names (strings)
numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

class DataSetExtractor():
    def __init__(self, pathToGroundTruth):
        self.pathToGroundTruth = pathToGroundTruth

        self.maskNames = sorted(glob.glob1(pathToGroundTruth, "*.txt"), key=numericalSort)
        self.legendFileName = glob.glob1(pathToGroundTruth, "*.leg")

        self.labelDict = self.__loadLabelConfig()
        self.legendDict = self.__readLegendFile()

        self.labelArray = None
        return

    def extractDataSet(self):
        """
        Extracts dataset using all images and masks available in the given directory.
        Use the "useOnePatchPerClass" variable if needed to even out the data sets per class
        distribution.
        """
        allPatchArray = []
        allLabelArray = []
        numImages = len(self.maskNames)
        for i in range(numImages):
            # Read in label file
            maskArr = self.__processMask(self.maskNames[i]).astype('uint8')
            #maskArr = self.__filterMask(maskArr)
            # Convert it to image
            img = Image.fromarray(maskArr)
            # Save image
            name = self.maskNames[i].split(".")[0]
            img.save("E:/RoboCup/YOLOBU/Labels/Train/" + name + ".png")
            #img = Colorize(torch.from_numpy(np.array(img, np.int32, copy=False)))
            #Image.fromarray(img.permute(1,2,0).numpy()).show()
            print(i)

    def __loadLabelConfig(self):
        """
        Reads LabelConfig.txt to dictionary
        """
        labelDict = {}
        with open(self.pathToGroundTruth + "LabelConfig.cfg") as file:
            data = file.readlines()
            data = [x.replace("\n","") for x in data]
            data = [x.split(":") for x in data]
            for i in data:
                labelDict[i[0]] = i[1]

        return labelDict

    def __filterMask(self,mask):
        newMask = np.empty((480,640)).astype('uint8')
        for row in range(480):
            for col in range(640):
                hist = np.zeros(5)
                for i in range(-2,2,1):
                    if row + i < 0 or row + i == 480:
                        continue
                    for j in range(-2,2,1):
                        if col + j < 0 or col + j == 640:
                            continue
                        hist[mask[(row + i, col + j)]] += 1
                maxVal = np.amax(hist)
                maxIdx = np.argmax(hist)
                if maxVal >= 15 or hist[mask[(row,col)]] < 7:
                    newMask[(row,col)] = maxIdx
                else:
                    newMask[(row,col)] = mask[(row,col)]

        return newMask


    def __processMask(self, maskName, imageHeight=480):
        """
        Processes given maskFile into 2d-array structure.
        """
        maskArray = np.empty((480,640))
        with open(self.pathToGroundTruth + maskName, "r") as currFile:
            for i in range(imageHeight): #480
                #read line from segMaskFile
                currLineData = currFile.readline()
                #gather segNames from File
                currLineData = currLineData.split(" ")
                for j in range(640):
                    currPix = int(currLineData[j])
                    if( currPix > 0):
                        maskArray[(i,j)] = self.__getLabel(currPix)
                    else:
                        maskArray[(i,j)] = 0
        return maskArray


    def __loadFileNames(self, filesPath, extention, isSorted=True, sortingCriterion=None):
        """
        Load all file names in the given directory with the given extention into a list
        """
        print("loading files: " + extention)
        fileList = []
        #if any files in folder
        if(len(os.walk(filesPath).next()[2]) > 0): #1:folder, 2:files
            allFileList = os.walk(filesPath).next()[2]
            #print(allFileList);
            for file in allFileList:
                if(file.endswith(extention)):
                    fileList.append(file)

        if(isSorted):
            return sorted(fileList, sortingCriterion)

        return fileList


    def __readLegendFile(self):
        """
        Loads the legend file generated by UETrainingSetGenerator into a
        dictionary structure
        """
        legendDict = {}
        with open(self.pathToGroundTruth + self.legendFileName[0], "r") as currFile:
            fileData = currFile.readline().split(" ")
            currLegendIndex = 0
            for i in fileData:
                i = i.split(":")
                if(len(i) < 2): #catching occunring whitespaces at file endings
                    continue

                currLegendIndex += int(i[0])
                legendDict[str(currLegendIndex)] = i[1]
        return legendDict


    def __getTag(self, key):
        legendKeyArray = sorted(map(int, self.legendDict.keys()))
        for legendKey in legendKeyArray:
            if(key-1 < legendKey):
                return (self.legendDict[str(legendKey)])

    def __getLabel(self, key):

        currTag = self.__getTag(key)
        return(int(self.labelDict[currTag]))

dataSetExtractor = DataSetExtractor("E:/RoboCup/YOLOBU/Masks/Train/")
dataSetExtractor.extractDataSet()