Changeset 76
- Timestamp:
- 08/03/07 15:13:56 (13 months ago)
- Files:
-
- 1 modified
-
Xml/Validator/Backend/OmeValidator.py (modified) (14 diffs)
Legend:
- Unmodified
- Added
- Removed
-
Xml/Validator/Backend/OmeValidator.py
r75 r76 2 2 # encoding: utf-8 3 3 """ 4 O METiffValidator.py4 OmeValidator.py 5 5 6 6 Created by Andrew Patterson on 2007-07-24. … … 19 19 # Standard Imports 20 20 import logging 21 from xml.dom.minidom import getDOMImplementation 21 22 from StringIO import StringIO 22 23 from xml import sax 24 import os 25 from stat import * 23 26 24 27 # Try to load Image for XML Schema Vlaidation Support … … 138 141 self.warningList = list() 139 142 self.unresolvedList = list() 143 144 # build the blank dom 145 self.theDom = None 140 146 141 147 def __str__(self): … … 174 180 return out 175 181 176 def parse(self, in Xml):182 def parse(self, inFile): 177 183 """ 178 184 Parse - Main work function - Validates the XML, sets the flags and 179 185 populates the error and warning logs 180 186 """ 181 # check there is some xml data 182 if len(inXml) == 0: 183 self.errorList.append(ParseMessage(None, None, None, "NoData", None, "No Xml data found")) 184 return 185 186 ## mark xlm as having been parsed 187 # mark xlm as having been parsed 187 188 self.hasParsedXml = True 188 189 189 190 # look at file for Ids, Refs, and namespaces 190 self.scanForIdsAndNamespace(in Xml)191 self.scanForIdsAndNamespace(inFile) 191 192 192 193 # check the xml is valid aginst it's schema 193 self.validateAgainstSchema( inXml)194 195 def validateAgainstSchema(self , inXml):194 self.validateAgainstSchema() 195 196 def validateAgainstSchema(self): 196 197 if not haveXsdSupport: 197 198 self.errorList.append(ParseMessage(None, None, None, "XSD", None, " LXML support not available - no validation")) … … 201 202 schema = self.loadChoosenSchema() 202 203 # create an IO string for the xml string provided 203 stringXml = StringIO( inXml)204 stringXml = StringIO(self.theDom.toxml()) 204 205 # building the document tree from the input xml 205 206 try: … … 242 243 return schema 243 244 244 def scanForIdsAndNamespace(self, in Xml):245 def scanForIdsAndNamespace(self, inFile): 245 246 ''' 246 247 Look through the Xml stream for the namespace and store all the ID tags 247 248 This version looks at all the elements 248 249 ''' 249 # from xml.sax.handler import feature_namespaces 250 # # Create a parser 251 # parser = make_parser() 252 # 253 # # Enable namespace processing 254 # parser.setFeature(feature_namespaces, 1) 255 250 256 251 # locate the handler class to the parser 257 252 handlerContent = ElementAggregator() … … 260 255 # the xml in turn 261 256 try: 262 self.myParseString(inXml, handlerContent, handlerError, True) 263 #sax.parseString(inXml, handlerContent, handlerError) 257 sax.parse(inFile, handlerContent, handlerError) 264 258 except sax.SAXParseException: 265 259 self.errorList.append(ParseMessage(None, None, None, "XmlError",None, "Parsing of XML failed")) … … 274 268 # store the namespace 275 269 self.theNamespace = handlerContent.theNamespace 276 277 print handlerContent.shortFormXml 278 279 def myParseString(self, inXml, inHandlerContent, inHandlerError, inUseNamespace): 280 parser = sax.make_parser() 281 282 if inUseNamespace: 283 #inHandlerContent.feature_namespace_prefixes = True 284 parser.setFeature(sax.handler.feature_namespaces, True) 285 286 parser.setContentHandler(inHandlerContent) 287 parser.setErrorHandler(inHandlerError) 288 289 inpsrc = sax.xmlreader.InputSource() 290 inpsrc.setByteStream(StringIO(inXml)) 291 parser.parse(inpsrc) 270 # store the dom 271 self.theDom = handlerContent.dom 292 272 293 273 def scanForFirstOmeNamespace(self, inXml): … … 323 303 # Open the file 324 304 try: 325 # TODO - stop this loading entire file into memory326 305 theFile = open(inFilename, 'r') 327 theXml = theFile.read() 306 # check the file contains some data 307 length = os.stat(inFilename)[ST_SIZE] 308 if length == 0: 309 theFileReport.errorList.append(ParseMessage(inFilename, None, None, "IOFile","", "XML file was of zero length")) 328 310 except IOError: 329 311 theFileReport.errorList.append(ParseMessage(inFilename, None, None, "IOFile","", "XML file could not be read")) 330 312 return theFileReport 331 313 332 # Look for OME xml element 333 334 # Check for the OME namespace is loaded 335 336 theFileReport.parse(theXml) 314 # parse the file into the report and validate it 315 theFileReport.parse(theFile) 337 316 theFileReport.isOmeTiff = False 317 # the report has now been populated 338 318 return theFileReport 339 319 validateFile = classmethod(validateFile) … … 344 324 """ 345 325 theTiffReport = klass() 326 # check there is tiff file support 346 327 if not haveTiffSupport: 347 328 theTiffReport.isOmeTiff = False 348 329 theTiffReport.errorList.append(ParseMessage(inFilename, None, None, "NoLibrary","", "No Tiff library found - file could not be read")) 349 330 else: 331 # load the tiff image 350 332 try: 351 333 image = Image.open(inFilename) … … 354 336 theTiffReport.errorList.append(ParseMessage(inFilename, None, None, "InvalidFile","", "Not recognised as Tiff format - file could not be read")) 355 337 else: 338 # check for the XML containing tag within the tiff 356 339 if 270 not in image.tag.keys(): 357 340 theTiffReport.isOmeTiff = False 358 341 theTiffReport.errorList.append(ParseMessage(inFilename, None, None, "InvalidTiff","", "Tiff file did not containg an ImageDescription Tag - no XML found")) 359 342 else: 360 xml = image.tag[270] 343 # read the xml from the tiff 344 theXml = image.tag[270] 361 345 theTiffReport.isOmeTiff = True 362 theTiffReport.parse(xml) 346 # create a file object to represent the xml string 347 theFileString = StringIO(theXml) 348 # parse the new string/file object into the report and validate it 349 theTiffReport.parse(theFileString) 363 350 return theTiffReport 364 351 validateTiff = classmethod(validateTiff) … … 391 378 # Used to process all Elements by sax parser 392 379 class ElementAggregator(sax.ContentHandler): 380 inBinData = False 381 393 382 def startDocument(self): 394 383 ''' … … 401 390 self.shortFormXml = "" 402 391 self.skipCount = 0 403 404 def startElementNS(self, name, qname, attribs): 392 393 # Setup the DOM chunk 394 impl = getDOMImplementation() 395 self.dom = impl.createDocument(None, None, None) 396 self.stack = list() 397 398 def startElement(self, name, attribs): 405 399 ''' 406 400 Examine each element in turn and harvest any useful information 407 401 ''' 408 (theElementNamespace, theElementName) = name409 410 402 # pull the namespace out of the OME element 411 if theElementName == "OME": 412 if theElementNamespace == None: 403 if name == "OME": 404 try: 405 self.theNamespace = attribs.getValue("xmlns") 406 except KeyError: 413 407 self.theNamespace = "" 414 return415 self.theNamespace = theElementNamespace416 408 417 409 # save the ID in any elements encountered 418 if theElementName[-3:] == "Ref":410 if name[-3:] == "Ref": 419 411 try: 420 412 # If a Ref element then save in the refrences 421 self.references.append(attribs.getValue( (None,"ID")))413 self.references.append(attribs.getValue("ID")) 422 414 except KeyError: 423 415 pass … … 425 417 try: 426 418 # If any other element thee save in the ids 427 self.ids.append(attribs.getValue( (None,"ID")))419 self.ids.append(attribs.getValue("ID")) 428 420 except KeyError: 429 421 pass 430 431 # mark start of a BinData element - assumes valid schema 432 if theElementName == "BinData": 433 self.inBinDataContent = True 434 self.skipCount = 0 422 423 if name[-7:] == "BinData": 424 self.inBinData = True 425 self.domify(name, attribs) 435 426 436 self.shortFormXml = self.shortFormXml + "<%s xmlns=\"%s\"" % (theElementName, theElementNamespace) 437 438 for ((theAttribNamespace, theAttribName), value) in attribs.items(): 439 self.shortFormXml = self.shortFormXml + ' ' + theAttribName + '="' + attribs.getValue((theAttribNamespace,theAttribName)) + '"' 440 self.shortFormXml = self.shortFormXml + '>' 441 for ((attr_ns, lname), value) in attribs.items(): 442 if attr_ns is not None: 443 attr_qname = attribs.getQNameByName((attr_ns, lname)) 444 else: 445 attr_qname = lname 446 return 447 448 def endElementNS(self, name, qname): 449 ''' 450 Record information as element closes 451 ''' 452 (theElementNamespace, theElementName) = name 453 # mark end of a BinData element - assumes valid schema 454 if theElementName == "BinData": 455 self.inBinDataContent = False 456 self.shortFormXml = self.shortFormXml + "Skipped:" 457 self.shortFormXml = self.shortFormXml + str(self.skipCount) 458 self.shortFormXml = self.shortFormXml + '</' + theElementName + '>' 459 460 def characters(self, ch): 461 ''' 462 Copy each character in turn or count if skipped 463 ''' 464 if self.inBinDataContent: 465 self.skipCount = self.skipCount + len(ch) 427 def endElement(self, name): 428 newElement = self.stack.pop() 429 length = len(self.stack) 430 if length == 0: 431 self.dom.appendChild(newElement) 466 432 else: 467 self.shortFormXml = self.shortFormXml + ch 468 433 self.stack[-1].appendChild(newElement) 434 self.clear() 435 436 def domify(self, name, attribs): 437 newElement = self.dom.createElement(name) 438 for (attr, value) in attribs.items(): 439 newAttribute = self.dom.createAttribute(attr) 440 newAttribute.value = value 441 newElement.setAttributeNode(newAttribute) 442 self.stack.append(newElement) 443 444 def characters(self, content): 445 if not self.inBinData: 446 # Strip trailing and/or leading whitespace, "\n", "\r", etc. 447 content = content.strip().strip('\n\r') 448 if len(content) > 0 and len(self.stack) > 0: 449 textNode = self.dom.createTextNode(content) 450 self.stack[-1].appendChild(textNode) 451 452 def clear(self): 453 self.inBinData = False 469 454 470 455 # Used to process the Elements until a namespace is found by sax parser
