Support Unicode for saving and loading Pascal XML format

Use lxml to pretty the xml instead of minidom because minido does not support unicode
Save xml with unicode
Add tests dir to collect more images for unit tests in the future
This commit is contained in:
tzutalin 2016-12-02 11:23:40 +08:00
parent 930d111eb8
commit 50a079edc4
4 changed files with 47 additions and 44 deletions

View File

@ -514,7 +514,7 @@ class MainWindow(QMainWindow, WindowMixin):
if text is not None:
item.setText(text)
self.setDirty()
# Tzutalin 20160906 : Add file list and dock to move faster
def fileitemDoubleClicked(self, item=None):
currIndex = self.mImgList.index(str(item.text()))
@ -538,7 +538,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected)
print 'shapeSelectionChanged'
def addLabel(self, shape):
item = QListWidgetItem(shape.label)
@ -688,7 +687,7 @@ class MainWindow(QMainWindow, WindowMixin):
filename = self.settings['filename']
filename = unicode(filename)
# Tzutalin 20160906 : Add file list and dock to move faster
# Tzutalin 20160906 : Add file list and dock to move faster
# Highlight the file item
if filename and self.fileListWidget.count() > 0:
index = self.mImgList.index(filename)
@ -736,8 +735,8 @@ class MainWindow(QMainWindow, WindowMixin):
## Label xml file and show bound box according to its filename
if self.usingPascalVocFormat is True and \
self.defaultSaveDir is not None:
basename = os.path.basename(os.path.splitext(self.filename)[0])
xmlPath = os.path.join(self.defaultSaveDir, basename + '.xml')
basename = os.path.basename(os.path.splitext(self.filename)[0]) + '.xml'
xmlPath = os.path.join(self.defaultSaveDir, basename)
self.loadPascalXMLByFilename(xmlPath)
return True
@ -1069,13 +1068,13 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.labelHist.append(line)
def loadPascalXMLByFilename(self, filename):
def loadPascalXMLByFilename(self, xmlPath):
if self.filename is None:
return
if os.path.exists(filename) is False:
if os.path.isfile(xmlPath) is False:
return
tVocParseReader = PascalVocReader(filename)
tVocParseReader = PascalVocReader(xmlPath)
shapes = tVocParseReader.getShapes()
self.loadLabels(shapes)

View File

@ -1,10 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-
import _init_path
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from xml.dom import minidom
from lxml import etree
class PascalVocWriter:
def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None):
self.foldername = foldername
self.filename = filename
@ -17,9 +21,9 @@ class PascalVocWriter:
"""
Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem,'utf8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent="\t")
rough_string = ElementTree.tostring(elem, 'utf8')
root = etree.fromstring(rough_string)
return etree.tostring(root, pretty_print=True)
def genXML(self):
"""
@ -30,46 +34,45 @@ class PascalVocWriter:
self.foldername is None or \
self.imgSize is None or \
len(self.boxlist) <= 0:
return None
return None
top = Element('annotation')
folder = SubElement(top,'folder')
folder = SubElement(top, 'folder')
folder.text = self.foldername
filename = SubElement(top,'filename')
filename = SubElement(top, 'filename')
filename.text = self.filename
localImgPath = SubElement(top,'path')
localImgPath = SubElement(top, 'path')
localImgPath.text = self.localImgPath
source = SubElement(top,'source')
database = SubElement(source,'database')
source = SubElement(top, 'source')
database = SubElement(source, 'database')
database.text = self.databaseSrc
size_part = SubElement(top,'size')
width = SubElement(size_part,'width')
height = SubElement(size_part,'height')
depth = SubElement(size_part,'depth')
size_part = SubElement(top, 'size')
width = SubElement(size_part, 'width')
height = SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth')
width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0])
if len(self.imgSize)==3:
if len(self.imgSize) == 3:
depth.text = str(self.imgSize[2])
else:
depth.text = '1'
segmented = SubElement(top,'segmented')
segmented.text ='0'
segmented = SubElement(top, 'segmented')
segmented.text = '0'
return top
def addBndBox(self, xmin, ymin, xmax, ymax, name):
bndbox = {'xmin':xmin, 'ymin':ymin, 'xmax':xmax, 'ymax':ymax}
bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
bndbox['name'] = name
self.boxlist.append(bndbox);
self.boxlist.append(bndbox)
def appendObjects(self, top):
for each_object in self.boxlist:
object_item = SubElement(top,'object')
object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name')
name.text = str(each_object['name'])
pose = SubElement(object_item, 'pose')
@ -88,25 +91,26 @@ class PascalVocWriter:
ymax = SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax'])
def save(self, targetFile = None):
def save(self, targetFile=None):
root = self.genXML()
self.appendObjects(root)
out_file = None
if targetFile is None:
out_file = open(self.filename + '.xml','w')
out_file = open(self.filename + '.xml', 'w')
else:
out_file = open(targetFile, 'w')
out_file.write(self.prettify(root))
prettifyResult = self.prettify(root)
out_file.write(prettifyResult)
out_file.close()
class PascalVocReader:
def __init__(self, filepath):
## shapes type:
## [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color]
self.shapes=[]
# shapes type:
# [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color]
self.shapes = []
self.filepath = filepath
self.parseXML()
@ -118,22 +122,23 @@ class PascalVocReader:
ymin = rect[1]
xmax = rect[2]
ymax = rect[3]
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
self.shapes.append((label, points, None, None))
def parseXML(self):
assert self.filepath.endswith('.xml'), "Unsupport file format"
xmltree = ElementTree.parse(self.filepath).getroot()
parser = etree.XMLParser(encoding='utf-8')
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
for object_iter in xmltree.findall('object'):
rects = []
bndbox = object_iter.find("bndbox")
rects.append([int(it.text) for it in bndbox])
label = object_iter.find('name').text
rects = []
bndbox = object_iter.find("bndbox")
rects.append([int(it.text) for it in bndbox])
label = object_iter.find('name').text
for rect in rects:
self.addShape(label, rect)
for rect in rects:
self.addShape(label, rect)
return True
@ -146,4 +151,3 @@ tmp.addBndBox(10,10,20,30,'chair')
tmp.addBndBox(1,1,600,600,'car')
tmp.save()
"""

BIN
tests/test.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

BIN
tests/臉書.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 747 B