resolve save utf-8 characters but xml file content is like "哈", not person readable

This commit is contained in:
ruifeng.shan 2017-04-21 15:20:26 +08:00
parent c3d1baac5f
commit 6dae9fdac7

View File

@ -2,8 +2,6 @@
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
import _init_path import _init_path
import sys import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree from lxml import etree
import codecs import codecs
@ -25,9 +23,10 @@ class PascalVocWriter:
""" """
Return a pretty-printed XML string for the Element. Return a pretty-printed XML string for the Element.
""" """
rough_string = ElementTree.tostring(elem, 'utf8') rough_string = etree.tostring(elem, encoding='UTF-8')
root = etree.fromstring(rough_string) rough_string = str(rough_string, encoding="UTF-8")
return etree.tostring(root, pretty_print=True) root = etree.XML(rough_string)
return etree.tostring(root, encoding='UTF-8', pretty_print=True)
def genXML(self): def genXML(self):
""" """
@ -39,26 +38,26 @@ class PascalVocWriter:
self.imgSize is None: self.imgSize is None:
return None return None
top = Element('annotation') top = etree.Element('annotation')
top.set('verified', 'yes' if self.verified else 'no') top.set('verified', 'yes' if self.verified else 'no')
folder = SubElement(top, 'folder') folder = etree.SubElement(top, 'folder')
folder.text = self.foldername folder.text = self.foldername
filename = SubElement(top, 'filename') filename = etree.SubElement(top, 'filename')
filename.text = self.filename filename.text = self.filename
localImgPath = SubElement(top, 'path') localImgPath = etree.SubElement(top, 'path')
localImgPath.text = self.localImgPath localImgPath.text = self.localImgPath
source = SubElement(top, 'source') source = etree.SubElement(top, 'source')
database = SubElement(source, 'database') database = etree.SubElement(source, 'database')
database.text = self.databaseSrc database.text = self.databaseSrc
size_part = SubElement(top, 'size') size_part = etree.SubElement(top, 'size')
width = SubElement(size_part, 'width') width = etree.SubElement(size_part, 'width')
height = SubElement(size_part, 'height') height = etree.SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth') depth = etree.SubElement(size_part, 'depth')
width.text = str(self.imgSize[1]) width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0]) height.text = str(self.imgSize[0])
if len(self.imgSize) == 3: if len(self.imgSize) == 3:
@ -66,7 +65,7 @@ class PascalVocWriter:
else: else:
depth.text = '1' depth.text = '1'
segmented = SubElement(top, 'segmented') segmented = etree.SubElement(top, 'segmented')
segmented.text = '0' segmented.text = '0'
return top return top
@ -77,27 +76,27 @@ class PascalVocWriter:
def appendObjects(self, top): def appendObjects(self, top):
for each_object in self.boxlist: for each_object in self.boxlist:
object_item = SubElement(top, 'object') object_item = etree.SubElement(top, 'object')
name = SubElement(object_item, 'name') name = etree.SubElement(object_item, 'name')
try: try:
name.text = unicode(each_object['name']) name.text = unicode(each_object['name'])
except NameError: except NameError:
# Py3: NameError: name 'unicode' is not defined # Py3: NameError: name 'unicode' is not defined
name.text = each_object['name'] name.text = each_object['name']
pose = SubElement(object_item, 'pose') pose = etree.SubElement(object_item, 'pose')
pose.text = "Unspecified" pose.text = "Unspecified"
truncated = SubElement(object_item, 'truncated') truncated = etree.SubElement(object_item, 'truncated')
truncated.text = "0" truncated.text = "0"
difficult = SubElement(object_item, 'difficult') difficult = etree.SubElement(object_item, 'difficult')
difficult.text = "0" difficult.text = "0"
bndbox = SubElement(object_item, 'bndbox') bndbox = etree.SubElement(object_item, 'bndbox')
xmin = SubElement(bndbox, 'xmin') xmin = etree.SubElement(bndbox, 'xmin')
xmin.text = str(each_object['xmin']) xmin.text = str(each_object['xmin'])
ymin = SubElement(bndbox, 'ymin') ymin = etree.SubElement(bndbox, 'ymin')
ymin.text = str(each_object['ymin']) ymin.text = str(each_object['ymin'])
xmax = SubElement(bndbox, 'xmax') xmax = etree.SubElement(bndbox, 'xmax')
xmax.text = str(each_object['xmax']) xmax.text = str(each_object['xmax'])
ymax = SubElement(bndbox, 'ymax') ymax = etree.SubElement(bndbox, 'ymax')
ymax.text = str(each_object['ymax']) ymax.text = str(each_object['ymax'])
def save(self, targetFile=None): def save(self, targetFile=None):
@ -138,8 +137,14 @@ class PascalVocReader:
def parseXML(self): def parseXML(self):
assert self.filepath.endswith('.xml'), "Unsupport file format" assert self.filepath.endswith('.xml'), "Unsupport file format"
parser = etree.XMLParser(encoding='utf-8') content = None
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot() with open(self.filepath, 'r') as xmlFile:
content = xmlFile.read()
if content is None:
return False
xmltree = etree.XML(content)
filename = xmltree.find('filename').text filename = xmltree.find('filename').text
try: try:
verified = xmltree.attrib['verified'] verified = xmltree.attrib['verified']