Support Unicode for saving and loading Pascal XML format

Use lxml to pretty the xml instead of minidom because minido does not support unicode
Save xml with unicode
Add tests dir to collect more images for unit tests in the future
This commit is contained in:
tzutalin 2016-12-02 11:23:40 +08:00
parent 930d111eb8
commit 50a079edc4
4 changed files with 47 additions and 44 deletions

View File

@ -538,7 +538,6 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected)
print 'shapeSelectionChanged'
def addLabel(self, shape):
item = QListWidgetItem(shape.label)
@ -736,8 +735,8 @@ class MainWindow(QMainWindow, WindowMixin):
## Label xml file and show bound box according to its filename
if self.usingPascalVocFormat is True and \
self.defaultSaveDir is not None:
basename = os.path.basename(os.path.splitext(self.filename)[0])
xmlPath = os.path.join(self.defaultSaveDir, basename + '.xml')
basename = os.path.basename(os.path.splitext(self.filename)[0]) + '.xml'
xmlPath = os.path.join(self.defaultSaveDir, basename)
self.loadPascalXMLByFilename(xmlPath)
return True
@ -1069,13 +1068,13 @@ class MainWindow(QMainWindow, WindowMixin):
else:
self.labelHist.append(line)
def loadPascalXMLByFilename(self, filename):
def loadPascalXMLByFilename(self, xmlPath):
if self.filename is None:
return
if os.path.exists(filename) is False:
if os.path.isfile(xmlPath) is False:
return
tVocParseReader = PascalVocReader(filename)
tVocParseReader = PascalVocReader(xmlPath)
shapes = tVocParseReader.getShapes()
self.loadLabels(shapes)

View File

@ -1,10 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf8 -*-
import _init_path
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from xml.dom import minidom
from lxml import etree
class PascalVocWriter:
def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None):
self.foldername = foldername
self.filename = filename
@ -18,8 +22,8 @@ class PascalVocWriter:
Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem, 'utf8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent="\t")
root = etree.fromstring(rough_string)
return etree.tostring(root, pretty_print=True)
def genXML(self):
"""
@ -59,13 +63,12 @@ class PascalVocWriter:
segmented = SubElement(top, 'segmented')
segmented.text = '0'
return top
def addBndBox(self, xmin, ymin, xmax, ymax, name):
bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
bndbox['name'] = name
self.boxlist.append(bndbox);
self.boxlist.append(bndbox)
def appendObjects(self, top):
for each_object in self.boxlist:
@ -97,15 +100,16 @@ class PascalVocWriter:
else:
out_file = open(targetFile, 'w')
out_file.write(self.prettify(root))
prettifyResult = self.prettify(root)
out_file.write(prettifyResult)
out_file.close()
class PascalVocReader:
def __init__(self, filepath):
## shapes type:
## [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color]
# shapes type:
# [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color]
self.shapes = []
self.filepath = filepath
self.parseXML()
@ -123,7 +127,8 @@ class PascalVocReader:
def parseXML(self):
assert self.filepath.endswith('.xml'), "Unsupport file format"
xmltree = ElementTree.parse(self.filepath).getroot()
parser = etree.XMLParser(encoding='utf-8')
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
for object_iter in xmltree.findall('object'):
@ -146,4 +151,3 @@ tmp.addBndBox(10,10,20,30,'chair')
tmp.addBndBox(1,1,600,600,'car')
tmp.save()
"""

BIN
tests/test.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

BIN
tests/臉書.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 747 B