resolve save utf-8 characters but xml file content is like "哈", not person readable

This commit is contained in:
ruifeng.shan 2017-04-21 15:20:26 +08:00
parent c3d1baac5f
commit 6dae9fdac7

View File

@ -2,8 +2,6 @@
# -*- coding: utf8 -*-
import _init_path
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
@ -25,9 +23,10 @@ class PascalVocWriter:
"""
Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem, 'utf8')
root = etree.fromstring(rough_string)
return etree.tostring(root, pretty_print=True)
rough_string = etree.tostring(elem, encoding='UTF-8')
rough_string = str(rough_string, encoding="UTF-8")
root = etree.XML(rough_string)
return etree.tostring(root, encoding='UTF-8', pretty_print=True)
def genXML(self):
"""
@ -39,26 +38,26 @@ class PascalVocWriter:
self.imgSize is None:
return None
top = Element('annotation')
top = etree.Element('annotation')
top.set('verified', 'yes' if self.verified else 'no')
folder = SubElement(top, 'folder')
folder = etree.SubElement(top, 'folder')
folder.text = self.foldername
filename = SubElement(top, 'filename')
filename = etree.SubElement(top, 'filename')
filename.text = self.filename
localImgPath = SubElement(top, 'path')
localImgPath = etree.SubElement(top, 'path')
localImgPath.text = self.localImgPath
source = SubElement(top, 'source')
database = SubElement(source, 'database')
source = etree.SubElement(top, 'source')
database = etree.SubElement(source, 'database')
database.text = self.databaseSrc
size_part = SubElement(top, 'size')
width = SubElement(size_part, 'width')
height = SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth')
size_part = etree.SubElement(top, 'size')
width = etree.SubElement(size_part, 'width')
height = etree.SubElement(size_part, 'height')
depth = etree.SubElement(size_part, 'depth')
width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0])
if len(self.imgSize) == 3:
@ -66,7 +65,7 @@ class PascalVocWriter:
else:
depth.text = '1'
segmented = SubElement(top, 'segmented')
segmented = etree.SubElement(top, 'segmented')
segmented.text = '0'
return top
@ -77,27 +76,27 @@ class PascalVocWriter:
def appendObjects(self, top):
for each_object in self.boxlist:
object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name')
object_item = etree.SubElement(top, 'object')
name = etree.SubElement(object_item, 'name')
try:
name.text = unicode(each_object['name'])
except NameError:
# Py3: NameError: name 'unicode' is not defined
name.text = each_object['name']
pose = SubElement(object_item, 'pose')
pose = etree.SubElement(object_item, 'pose')
pose.text = "Unspecified"
truncated = SubElement(object_item, 'truncated')
truncated = etree.SubElement(object_item, 'truncated')
truncated.text = "0"
difficult = SubElement(object_item, 'difficult')
difficult = etree.SubElement(object_item, 'difficult')
difficult.text = "0"
bndbox = SubElement(object_item, 'bndbox')
xmin = SubElement(bndbox, 'xmin')
bndbox = etree.SubElement(object_item, 'bndbox')
xmin = etree.SubElement(bndbox, 'xmin')
xmin.text = str(each_object['xmin'])
ymin = SubElement(bndbox, 'ymin')
ymin = etree.SubElement(bndbox, 'ymin')
ymin.text = str(each_object['ymin'])
xmax = SubElement(bndbox, 'xmax')
xmax = etree.SubElement(bndbox, 'xmax')
xmax.text = str(each_object['xmax'])
ymax = SubElement(bndbox, 'ymax')
ymax = etree.SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax'])
def save(self, targetFile=None):
@ -138,8 +137,14 @@ class PascalVocReader:
def parseXML(self):
assert self.filepath.endswith('.xml'), "Unsupport file format"
parser = etree.XMLParser(encoding='utf-8')
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
content = None
with open(self.filepath, 'r') as xmlFile:
content = xmlFile.read()
if content is None:
return False
xmltree = etree.XML(content)
filename = xmltree.find('filename').text
try:
verified = xmltree.attrib['verified']