-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathtxt2xml.py
128 lines (112 loc) · 4.1 KB
/
txt2xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*
import os
import sys, getopt
import xml.etree.ElementTree as ET
from PIL import Image
from xml.dom import minidom
def loadNames(filepath):
classes = []
for line in open(filepath, 'r'):
classes.append(line.strip())
print(classes)
return classes
def convert(size, strs):
centerX = float(strs[1])
centerY = float(strs[2])
rateW = float(strs[3])
rateH = float(strs[4])
imgW = size[0]
imgH = size[1]
realW = rateW*imgW
realH = rateH*imgH
realX = centerX*imgW
realY = centerY*imgH
xmin = int(realX+1-realW/2)
ymin = int(realY+1-realH/2)
xmax = int(realX+1+realW/2)
ymax = int(realY+1+realH/2)
return (xmin,ymin,xmax,ymax)
def prettyXml(filepath):
doc=minidom.parse(filepath)
f=open(filepath,'w',encoding='utf-8')
#addindent表示子元素缩进,newl='\n'表示元素间换行,encoding='utf-8'表示生成的xml的编码格式(<?xml version="1.0" encoding="utf-8"?>)
doc.writexml(f, addindent=' ', newl='\n', encoding='utf-8')
if len(sys.argv) < 4:
print('usage: python txt2xml.py -i input_dir -o output_dir -p images_dir -n names_file')
sys.exit(1)
#用法:
#input_dir = '/txt' 标签txt所在路径
#output_dir = '/xml' 要输出的xml文件路径
#images_dir = '/images' txt对应图片所在文件路径
#names_file = '/.names' 数据集类名所在文件路径
try:
options,args = getopt.getopt(sys.argv[1:],"i:o:p:n:",["input=", "output=", "images=", "names="])
except getopt.GetoptError:
sys.exit()
for name, value in options:
if name in ('-i', '--input'):
input_dir = value
if name in ('-o', '--output'):
output_dir = value
if name in ('-p', '--images'):
images_dir = value
if name in ('-n', '--names'):
names_file = value
if not os.path.exists(output_dir):
os.makedirs(output_dir)
namesArray = loadNames(names_file)
txtFiles = os.listdir(input_dir)
for temp in txtFiles:
if temp.endswith('.txt'):
infile = os.path.join(input_dir, temp)
print('Process: %s' % (infile))
imgfile = os.path.join(images_dir, temp.replace('.txt', '.jpg'))
if not os.path.exists(imgfile):
print('Skip, no image file: %s' % (imgfile))
continue
img = Image.open(imgfile)
(w, h) = img.size
root = ET.Element('annotation')
# skip 'folder'
filename = ET.SubElement(root, 'filename')
filename.text = temp.replace('.txt', '.jpg')
path = ET.SubElement(root, 'path')
path.text = imgfile
source = ET.SubElement(root, 'source')
database = ET.SubElement(source, 'database')
database.text = 'Unknow'
size = ET.SubElement(root, 'size')
width = ET.SubElement(size, 'width')
width.text = str(w)
height = ET.SubElement(size, 'height')
height.text = str(h)
depth = ET.SubElement(size, 'depth')
depth.text = '3'
segmented = ET.SubElement(root, 'segmented')
segmented.text = '0'
for line in open(infile, 'r'):
array = line.split(' ')
nameId = int(array[0])
obj = ET.SubElement(root, 'object')
name = ET.SubElement(obj, 'name')
name.text = namesArray[nameId]
pos = ET.SubElement(obj, 'pose')
pos.text = 'Unspecified'
truncat = ET.SubElement(obj, 'truncated')
truncat.text = '0'
difficult = ET.SubElement(obj, 'difficult')
difficult.text = '0'
bndbox = ET.SubElement(obj, 'bndbox')
(x_lt, y_lt, x_rb, y_rb) = convert((w, h), array)
xmin = ET.SubElement(bndbox, 'xmin')
ymin = ET.SubElement(bndbox, 'ymin')
xmax = ET.SubElement(bndbox, 'xmax')
ymax = ET.SubElement(bndbox, 'ymax')
xmin.text = str(x_lt)
ymin.text = str(y_lt)
xmax.text = str(x_rb)
ymax.text = str(y_rb)
tree = ET.ElementTree(root)
outfile = os.path.join(output_dir, temp.replace('.txt', '.xml'))
tree.write(outfile)
prettyXml(outfile)