greenhouse/libs/pascal_voc_io.py

172 lines
6.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding: utf8 -*-
2015-09-17 15:00:52 +08:00
import sys
2017-04-25 09:39:15 +02:00
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
2015-09-17 15:00:52 +08:00
from lxml import etree
import codecs
from libs.constants import DEFAULT_ENCODING
from libs.ustr import ustr
XML_EXT = '.xml'
ENCODE_METHOD = DEFAULT_ENCODING
2015-09-17 15:00:52 +08:00
class PascalVocWriter:
2017-04-25 09:39:15 +02:00
def __init__(self, foldername, filename, imgSize,databaseSrc='Unknown', localImgPath=None):
2015-09-17 15:00:52 +08:00
self.foldername = foldername
self.filename = filename
self.databaseSrc = databaseSrc
self.imgSize = imgSize
self.boxlist = []
self.localImgPath = localImgPath
self.verified = False
2015-09-17 15:00:52 +08:00
def prettify(self, elem):
"""
Return a pretty-printed XML string for the Element.
"""
2017-04-25 09:39:15 +02:00
rough_string = ElementTree.tostring(elem, 'utf8')
root = etree.fromstring(rough_string)
2017-07-07 23:14:41 +08:00
return etree.tostring(root, pretty_print=True, encoding=ENCODE_METHOD).replace(" ".encode(), "\t".encode())
2017-07-07 11:09:00 +08:00
# minidom does not support UTF-8
'''reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent="\t", encoding=ENCODE_METHOD)'''
2015-09-17 15:00:52 +08:00
def genXML(self):
"""
Return XML root
"""
# Check conditions
if self.filename is None or \
self.foldername is None or \
self.imgSize is None:
return None
2015-09-17 15:00:52 +08:00
2017-04-25 09:39:15 +02:00
top = Element('annotation')
2017-07-21 17:55:26 +08:00
if self.verified:
top.set('verified', 'yes')
2017-04-25 09:39:15 +02:00
folder = SubElement(top, 'folder')
2015-09-17 15:00:52 +08:00
folder.text = self.foldername
2017-04-25 09:39:15 +02:00
filename = SubElement(top, 'filename')
2015-09-17 15:00:52 +08:00
filename.text = self.filename
2017-07-07 11:09:00 +08:00
if self.localImgPath is not None:
localImgPath = SubElement(top, 'path')
localImgPath.text = self.localImgPath
2015-09-17 15:00:52 +08:00
2017-04-25 09:39:15 +02:00
source = SubElement(top, 'source')
database = SubElement(source, 'database')
2015-09-17 15:00:52 +08:00
database.text = self.databaseSrc
2017-04-25 09:39:15 +02:00
size_part = SubElement(top, 'size')
width = SubElement(size_part, 'width')
height = SubElement(size_part, 'height')
depth = SubElement(size_part, 'depth')
2015-09-17 15:00:52 +08:00
width.text = str(self.imgSize[1])
height.text = str(self.imgSize[0])
if len(self.imgSize) == 3:
2015-09-17 15:00:52 +08:00
depth.text = str(self.imgSize[2])
else:
depth.text = '1'
2017-04-25 09:39:15 +02:00
segmented = SubElement(top, 'segmented')
segmented.text = '0'
2015-09-17 15:00:52 +08:00
return top
2017-04-25 09:39:15 +02:00
def addBndBox(self, xmin, ymin, xmax, ymax, name, difficult):
bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
2015-09-17 15:00:52 +08:00
bndbox['name'] = name
bndbox['difficult'] = difficult
self.boxlist.append(bndbox)
2015-09-17 15:00:52 +08:00
def appendObjects(self, top):
for each_object in self.boxlist:
2017-04-25 09:39:15 +02:00
object_item = SubElement(top, 'object')
name = SubElement(object_item, 'name')
name.text = ustr(each_object['name'])
2017-04-25 09:39:15 +02:00
pose = SubElement(object_item, 'pose')
2015-09-17 15:00:52 +08:00
pose.text = "Unspecified"
2017-04-25 09:39:15 +02:00
truncated = SubElement(object_item, 'truncated')
if int(each_object['ymax']) == int(self.imgSize[0]) or (int(each_object['ymin'])== 1):
truncated.text = "1" # max == height or min
2017-04-25 09:39:15 +02:00
elif (int(each_object['xmax'])==int(self.imgSize[1])) or (int(each_object['xmin'])== 1):
truncated.text = "1" # max == width or min
2017-04-25 09:39:15 +02:00
else:
truncated.text = "0"
2017-05-10 21:26:51 +08:00
difficult = SubElement(object_item, 'difficult')
2017-04-25 09:39:15 +02:00
difficult.text = str( bool(each_object['difficult']) & 1 )
bndbox = SubElement(object_item, 'bndbox')
xmin = SubElement(bndbox, 'xmin')
2015-09-17 15:00:52 +08:00
xmin.text = str(each_object['xmin'])
2017-04-25 09:39:15 +02:00
ymin = SubElement(bndbox, 'ymin')
2015-09-17 15:00:52 +08:00
ymin.text = str(each_object['ymin'])
2017-04-25 09:39:15 +02:00
xmax = SubElement(bndbox, 'xmax')
2015-09-17 15:00:52 +08:00
xmax.text = str(each_object['xmax'])
2017-04-25 09:39:15 +02:00
ymax = SubElement(bndbox, 'ymax')
2015-09-17 15:00:52 +08:00
ymax.text = str(each_object['ymax'])
def save(self, targetFile=None):
2015-09-17 15:00:52 +08:00
root = self.genXML()
self.appendObjects(root)
out_file = None
if targetFile is None:
out_file = codecs.open(
2017-07-07 11:09:00 +08:00
self.filename + XML_EXT, 'w', encoding=ENCODE_METHOD)
2015-09-17 15:00:52 +08:00
else:
2017-07-07 11:09:00 +08:00
out_file = codecs.open(targetFile, 'w', encoding=ENCODE_METHOD)
2015-09-17 15:00:52 +08:00
prettifyResult = self.prettify(root)
out_file.write(prettifyResult.decode('utf8'))
2015-09-17 15:00:52 +08:00
out_file.close()
2015-12-09 21:29:26 +08:00
class PascalVocReader:
def __init__(self, filepath):
# shapes type:
2017-04-25 09:39:15 +02:00
# [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color, difficult]
self.shapes = []
2015-12-09 21:29:26 +08:00
self.filepath = filepath
self.verified = False
try:
self.parseXML()
except:
pass
2015-12-09 21:29:26 +08:00
def getShapes(self):
return self.shapes
2017-04-25 09:39:15 +02:00
def addShape(self, label, bndbox, difficult):
2016-12-20 08:30:11 -06:00
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
2017-04-25 09:39:15 +02:00
self.shapes.append((label, points, None, None, difficult))
2015-12-09 21:29:26 +08:00
def parseXML(self):
assert self.filepath.endswith(XML_EXT), "Unsupport file format"
2017-07-07 11:09:00 +08:00
parser = etree.XMLParser(encoding=ENCODE_METHOD)
2017-04-25 09:39:15 +02:00
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
2015-12-09 21:29:26 +08:00
filename = xmltree.find('filename').text
try:
verified = xmltree.attrib['verified']
if verified == 'yes':
self.verified = True
except KeyError:
self.verified = False
2015-12-09 21:29:26 +08:00
for object_iter in xmltree.findall('object'):
bndbox = object_iter.find("bndbox")
label = object_iter.find('name').text
2017-04-25 09:39:15 +02:00
# Add chris
2017-05-18 10:43:53 +08:00
difficult = False
if object_iter.find('difficult') is not None:
difficult = bool(int(object_iter.find('difficult').text))
2017-04-25 09:39:15 +02:00
self.addShape(label, bndbox, difficult)
2015-12-09 21:29:26 +08:00
return True