Support Unicode for saving and loading Pascal XML format

Use lxml to pretty the xml instead of minidom because minido does not support unicode
Save xml with unicode
Add tests dir to collect more images for unit tests in the future
This commit is contained in:
tzutalin 2016-12-02 11:23:40 +08:00
parent 930d111eb8
commit 50a079edc4
4 changed files with 47 additions and 44 deletions

View File

@ -514,7 +514,7 @@ class MainWindow(QMainWindow, WindowMixin):
if text is not None: if text is not None:
item.setText(text) item.setText(text)
self.setDirty() self.setDirty()
# Tzutalin 20160906 : Add file list and dock to move faster # Tzutalin 20160906 : Add file list and dock to move faster
def fileitemDoubleClicked(self, item=None): def fileitemDoubleClicked(self, item=None):
currIndex = self.mImgList.index(str(item.text())) currIndex = self.mImgList.index(str(item.text()))
@ -538,7 +538,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected) self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected) self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected) self.actions.shapeFillColor.setEnabled(selected)
print 'shapeSelectionChanged'
def addLabel(self, shape): def addLabel(self, shape):
item = QListWidgetItem(shape.label) item = QListWidgetItem(shape.label)
@ -688,7 +687,7 @@ class MainWindow(QMainWindow, WindowMixin):
filename = self.settings['filename'] filename = self.settings['filename']
filename = unicode(filename) filename = unicode(filename)
# Tzutalin 20160906 : Add file list and dock to move faster # Tzutalin 20160906 : Add file list and dock to move faster
# Highlight the file item # Highlight the file item
if filename and self.fileListWidget.count() > 0: if filename and self.fileListWidget.count() > 0:
index = self.mImgList.index(filename) index = self.mImgList.index(filename)
@ -736,8 +735,8 @@ class MainWindow(QMainWindow, WindowMixin):
## Label xml file and show bound box according to its filename ## Label xml file and show bound box according to its filename
if self.usingPascalVocFormat is True and \ if self.usingPascalVocFormat is True and \
self.defaultSaveDir is not None: self.defaultSaveDir is not None:
basename = os.path.basename(os.path.splitext(self.filename)[0]) basename = os.path.basename(os.path.splitext(self.filename)[0]) + '.xml'
xmlPath = os.path.join(self.defaultSaveDir, basename + '.xml') xmlPath = os.path.join(self.defaultSaveDir, basename)
self.loadPascalXMLByFilename(xmlPath) self.loadPascalXMLByFilename(xmlPath)
return True return True
@ -1069,13 +1068,13 @@ class MainWindow(QMainWindow, WindowMixin):
else: else:
self.labelHist.append(line) self.labelHist.append(line)
def loadPascalXMLByFilename(self, filename): def loadPascalXMLByFilename(self, xmlPath):
if self.filename is None: if self.filename is None:
return return
if os.path.exists(filename) is False: if os.path.isfile(xmlPath) is False:
return return
tVocParseReader = PascalVocReader(filename) tVocParseReader = PascalVocReader(xmlPath)
shapes = tVocParseReader.getShapes() shapes = tVocParseReader.getShapes()
self.loadLabels(shapes) self.loadLabels(shapes)

View File

@ -1,10 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-
import _init_path
import sys import sys
from xml.etree import ElementTree from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement from xml.etree.ElementTree import Element, SubElement
from xml.dom import minidom
from lxml import etree from lxml import etree
class PascalVocWriter: class PascalVocWriter:
def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None): def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None):
self.foldername = foldername self.foldername = foldername
self.filename = filename self.filename = filename
@ -17,9 +21,9 @@ class PascalVocWriter:
""" """
Return a pretty-printed XML string for the Element. Return a pretty-printed XML string for the Element.
""" """
rough_string = ElementTree.tostring(elem,'utf8') rough_string = ElementTree.tostring(elem, 'utf8')
reparsed = minidom.parseString(rough_string) root = etree.fromstring(rough_string)
return reparsed.toprettyxml(indent="\t") return etree.tostring(root, pretty_print=True)
def genXML(self): def genXML(self):
""" """
@ -30,46 +34,45 @@ class PascalVocWriter:
self.foldername is None or \ self.foldername is None or \
self.imgSize is None or \ self.imgSize is None or \
len(self.boxlist) <= 0: len(self.boxlist) <= 0:
return None return None
top = Element('annotation') top = Element('annotation')
folder = SubElement(top,'folder') folder = SubElement(top, 'folder')
folder.text = self.foldername folder.text = self.foldername
filename = SubElement(top,'filename') filename = SubElement(top, 'filename')
filename.text = self.filename filename.text = self.filename
localImgPath = SubElement(top,'path') localImgPath = SubElement(top, 'path')
localImgPath.text = self.localImgPath localImgPath.text = self.localImgPath
source = SubElement(top,'source') source = SubElement(top, 'source')
database = SubElement(source,'database') database = SubElement(source, 'database')
database.text = self.databaseSrc database.text = self.databaseSrc
size_part = SubElement(top,'size') size_part = SubElement(top, 'size')
width = SubElement(size_part,'width') width = SubElement(size_part, 'width')
height = SubElement(size_part,'height') height = SubElement(size_part, 'height')
depth = SubElement(size_part,'depth') depth = SubElement(size_part, 'depth')
width.text = str(self.imgSize[1]) width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0]) height.text = str(self.imgSize[0])
if len(self.imgSize)==3: if len(self.imgSize) == 3:
depth.text = str(self.imgSize[2]) depth.text = str(self.imgSize[2])
else: else:
depth.text = '1' depth.text = '1'
segmented = SubElement(top,'segmented') segmented = SubElement(top, 'segmented')
segmented.text ='0' segmented.text = '0'
return top return top
def addBndBox(self, xmin, ymin, xmax, ymax, name): def addBndBox(self, xmin, ymin, xmax, ymax, name):
bndbox = {'xmin':xmin, 'ymin':ymin, 'xmax':xmax, 'ymax':ymax} bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
bndbox['name'] = name bndbox['name'] = name
self.boxlist.append(bndbox); self.boxlist.append(bndbox)
def appendObjects(self, top): def appendObjects(self, top):
for each_object in self.boxlist: for each_object in self.boxlist:
object_item = SubElement(top,'object') object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name') name = SubElement(object_item, 'name')
name.text = str(each_object['name']) name.text = str(each_object['name'])
pose = SubElement(object_item, 'pose') pose = SubElement(object_item, 'pose')
@ -88,25 +91,26 @@ class PascalVocWriter:
ymax = SubElement(bndbox, 'ymax') ymax = SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax']) ymax.text = str(each_object['ymax'])
def save(self, targetFile = None): def save(self, targetFile=None):
root = self.genXML() root = self.genXML()
self.appendObjects(root) self.appendObjects(root)
out_file = None out_file = None
if targetFile is None: if targetFile is None:
out_file = open(self.filename + '.xml','w') out_file = open(self.filename + '.xml', 'w')
else: else:
out_file = open(targetFile, 'w') out_file = open(targetFile, 'w')
out_file.write(self.prettify(root)) prettifyResult = self.prettify(root)
out_file.write(prettifyResult)
out_file.close() out_file.close()
class PascalVocReader: class PascalVocReader:
def __init__(self, filepath): def __init__(self, filepath):
## shapes type: # shapes type:
## [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color] # [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color]
self.shapes=[] self.shapes = []
self.filepath = filepath self.filepath = filepath
self.parseXML() self.parseXML()
@ -118,22 +122,23 @@ class PascalVocReader:
ymin = rect[1] ymin = rect[1]
xmax = rect[2] xmax = rect[2]
ymax = rect[3] ymax = rect[3]
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)] points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
self.shapes.append((label, points, None, None)) self.shapes.append((label, points, None, None))
def parseXML(self): def parseXML(self):
assert self.filepath.endswith('.xml'), "Unsupport file format" assert self.filepath.endswith('.xml'), "Unsupport file format"
xmltree = ElementTree.parse(self.filepath).getroot() parser = etree.XMLParser(encoding='utf-8')
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text filename = xmltree.find('filename').text
for object_iter in xmltree.findall('object'): for object_iter in xmltree.findall('object'):
rects = [] rects = []
bndbox = object_iter.find("bndbox") bndbox = object_iter.find("bndbox")
rects.append([int(it.text) for it in bndbox]) rects.append([int(it.text) for it in bndbox])
label = object_iter.find('name').text label = object_iter.find('name').text
for rect in rects: for rect in rects:
self.addShape(label, rect) self.addShape(label, rect)
return True return True
@ -146,4 +151,3 @@ tmp.addBndBox(10,10,20,30,'chair')
tmp.addBndBox(1,1,600,600,'car') tmp.addBndBox(1,1,600,600,'car')
tmp.save() tmp.save()
""" """

BIN
tests/test.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

BIN
tests/臉書.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 747 B