6 * Adapted from load-grammar-dom.cxx
7 * Written by Boris Kolpackov <boris@codesynthesis.com>
8 * Assigned, by the author, to the public domain
10 * This program uses Xerces-C++ DOM parser to load a set of schema files
11 * and then to validate a set of XML documents against these schemas. To
12 * build this program you will need Xerces-C++ 3.0.0 or later. For more
15 * http: *www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
18 * Adaptation by Keith Marshall <keithmarshall@users.sourceforge.net>
19 * Copyright (C) 2013, 2019, MinGW.org Project
21 * This is free software. Permission is granted to copy, modify and
22 * redistribute this software, under the provisions of the GNU General
23 * Public License, Version 3, (or, at your option, any later version),
24 * as published by the Free Software Foundation; see the file COPYING
25 * for licensing details.
27 * Note, in particular, that this software is provided "as is", in the
28 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
29 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
30 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
31 * MinGW Project, accept liability for any damages, however caused,
32 * arising from the use of this software.
35 #define __STDC_FORMAT_MACROS 1
36 #include <inttypes.h> /* for PRIu64 */
38 #include <cstdio> /* for fprintf() */
40 #include <memory> /* for std::auto_ptr */
41 #include <cstddef> /* for std::size_t */
43 #include <libgen.h> /* for basename() */
45 #if __cplusplus >= 201103L
46 /* C++11 deprecates auto_ptr, in favour of shared_ptr or unique_ptr;
47 * prefer the latter alternative, in this case.
49 #define unique_or_auto_ptr unique_ptr
51 /* Using a pre-C++11 compiler; must still use auto_ptr
53 #define unique_or_auto_ptr auto_ptr
56 #include <xercesc/util/XMLUni.hpp>
57 #include <xercesc/util/XMLString.hpp>
58 #include <xercesc/util/PlatformUtils.hpp>
60 #include <xercesc/dom/DOM.hpp>
62 #include <xercesc/validators/common/Grammar.hpp>
63 #include <xercesc/framework/XMLGrammarPoolImpl.hpp>
66 using namespace xercesc;
68 #if _XERCES_VERSION < 30000
69 /* We need at least Xerces-C++ version 3.0.0
71 # error Xerces-C++ version >= 3.0.0 is required!
73 #elif _XERCES_VERSION >= 30100
74 /* We may wish to exploit some features which were not introduced
75 * until Xerces-C++ version 3.1.0
77 # define IF_XERCES_30100_PLUS( STATEMENT ) STATEMENT
80 /* We cannot use Xerces-C++ version 3.1.0 features; make them no-op.
82 # define IF_XERCES_30100_PLUS( STATEMENT )
85 class error_handler: public DOMErrorHandler
87 /* A locally defined class for capture of fault conditions, as
88 * reported by our DOM parsers.
93 error_handler( const char *rel): source(rel),
94 first_report(true), new_document(true), failed(false){}
96 /* Method to access recorded error condition status.
98 bool has_failed() const { return failed; }
100 /* Method to reset recorded status, in preparation for
101 * parsing a new document.
103 void reset(){ new_document = true; failed = false; }
105 /* Method to handle error conditions, on behalf of our
108 virtual bool handleError( const xercesc::DOMError& );
111 /* The type of XML input being parsed, recorded when we
112 * consturct the error handler for binding to a particular
117 /* Recording for error condition status.
119 bool first_report, new_document, failed;
123 error_handler::handleError( const xercesc::DOMError& condition )
125 /* Implementation of the error handler, which we will use to capture
126 * status, and report abnormal conditions detected by our DOM parsers.
128 bool warn = condition.getSeverity() == DOMError::DOM_SEVERITY_WARNING;
130 /* Record detection of any condition which is more severe than
133 if( ! warn ) failed = true;
135 /* Identify the location, within the current XML schema or document
136 * file, where the abnormality has been detected.
138 DOMLocator* loc( condition.getLocation() );
140 /* When this is the first abnormality detected within the current
141 * XML schema or document file...
145 /* ...but we've previously reported abnormalities within another
146 * input file, then separate the current report from diagnostics
147 * relating to that other file...
149 if( ! first_report ) fputc( '\n', stderr );
151 /* ...then, regardless of whatever may have gone before, format
152 * and emit a report header to identify the current file.
154 char *uri = XMLString::transcode( loc->getURI() );
155 fprintf( stderr, "Problem Report:\n%s: %s\n", source, uri );
156 XMLString::release( &uri );
158 /* Record that we've now emitted a report header and diagnostic
159 * for the current XML input file.
161 first_report = new_document = false;
164 /* Whether we added a new report header, or not, we still have a
165 * diagnostic message to emit.
167 char* msg = XMLString::transcode( condition.getMessage() );
168 fprintf( stderr, "%" PRIu64 ":%" PRIu64 ": %s: %s\n", loc->getLineNumber(),
169 loc->getColumnNumber(), warn ? "WARNING" : "ERROR", msg
171 XMLString::release( &msg );
173 /* Finally, we return "true" to tell the DOM parser that we've
174 * handled the error, and that it should continue parsing.
180 insufficient_arguments( bool status, const char *program_pathname )
182 /* Diagnostic routine to report a lack of any command arguments
183 * to specify the XML documents which are to be validated.
187 /* The "status" flag indicates an abnormal condition...
189 * We want to call "basename()" on the passed "program_pathname";
190 * while this is likely safe, it MAY try to modify the input string,
191 * so create a temporary working copy...
193 char progname[1 + strlen( program_pathname )];
195 /* ...then format and emit an appropriate diagnostic message.
197 strcpy( progname, program_pathname );
198 fprintf( stderr, "%s: no XML documents specified for validation\n"
199 "usage: %s [schema.xsd ...] document.xml ...\n", basename( progname ),
203 /* Irrespective of condition, we echo back the input state.
209 create_parser( XMLGrammarPool* pool )
211 /* Helper function, to instantiate a DOM parser with "LS", (load and
212 * save), capability, (although we intend to use only "load").
214 const XMLCh ls_id[] = { chLatin_L, chLatin_S, chNull };
216 /* Locate a DOM implementation, providing the requisite "LS" feature.
218 DOMImplementation* impl(
219 DOMImplementationRegistry::getDOMImplementation( ls_id ) );
221 /* Instantiate a parser, based on this DOM implementation.
224 impl->createLSParser(
225 DOMImplementationLS::MODE_SYNCHRONOUS,
227 XMLPlatformUtils::fgMemoryManager,
230 /* Retrieve a pointer to its configuration data...
232 DOMConfiguration* conf( parser->getDomConfig() );
234 /* ...so we may apply this commonly useful configuration.
236 conf->setParameter( XMLUni::fgDOMComments, false );
237 conf->setParameter( XMLUni::fgDOMDatatypeNormalization, true );
238 conf->setParameter( XMLUni::fgDOMElementContentWhitespace, false );
239 conf->setParameter( XMLUni::fgDOMNamespaces, true );
240 conf->setParameter( XMLUni::fgDOMEntities, false );
242 /* Enable validation.
244 conf->setParameter( XMLUni::fgDOMValidate, true );
245 conf->setParameter( XMLUni::fgXercesSchema, true );
246 conf->setParameter( XMLUni::fgXercesSchemaFullChecking, false );
248 /* Use the loaded grammar during parsing.
250 conf->setParameter( XMLUni::fgXercesUseCachedGrammarInParse, true );
252 /* Don't load schemas from any other source (e.g., from XML document's
253 * xsi:schemaLocation attributes).
255 conf->setParameter( XMLUni::fgXercesLoadSchema, false );
257 /* Xerces-C++ 3.1.0 is the first version with working support for
260 IF_XERCES_30100_PLUS(
261 conf->setParameter( XMLUni::fgXercesHandleMultipleImports, true )
264 /* We will release the DOM document ourselves.
266 conf->setParameter( XMLUni::fgXercesUserAdoptsDOMDocument, true );
268 /* Return a pointer to the instantiated parser.
274 validation_status( int argc, char **argv )
278 /* Initialize a grammer pool, for use by our parser instances.
280 MemoryManager* mm( XMLPlatformUtils::fgMemoryManager );
281 unique_or_auto_ptr<XMLGrammarPool> gp( new XMLGrammarPoolImpl( mm ) );
283 /* Load the schema definitions into the grammar pool.
287 /* Instantiate a parser for the schema definition file(s).
289 DOMLSParser* parser( create_parser( gp.get() ) );
291 /* Initialize an error handler for the schema context,
292 * and bind it to the schema file parser.
294 error_handler eh( "XML Schema" );
295 parser->getDomConfig()->setParameter( XMLUni::fgDOMErrorHandler, &eh );
297 /* Scan command arguments, left to right, to identify any XML schema
298 * files which we are expected to interpret.
300 do { const char *source = argv[argind]; size_t extent = strlen( source );
301 if( (extent > 4) && (strcasecmp( source + extent - 4, ".xsd" ) == 0) )
303 /* We have a "*.xsd" file to parse; do so, loading the grammar...
305 if( !parser->loadGrammar( source, Grammar::SchemaGrammarType, true ) )
307 /* ...but complain, and bail out, if loading fails...
309 fprintf( stderr, "%s: error: unable to load\n", source );
312 if( eh.has_failed() )
314 * ...or if any schema parsing error was encountered.
319 /* We've exhausted the "*.xsd" file references; break out of
320 * the scanning loop, without further ceremony.
324 /* Continue for the next "*.xsd" file, if any, provided there
325 * have been no schema abormalities detected thus far.
327 } while( (retcode == 0) && (++argind < argc) );
329 /* We're finished with our schema parser; release its resource pool.
334 /* Before proceeding to parse any XML documents, check that any
335 * specified XML schemas have been loaded successfully.
339 /* It's okay to proceed, but it would be pointless to do so...
341 if( insufficient_arguments( argind >= argc, *argv ) )
343 * ...when there are no remaining arguments to specify any
344 * XML documents for checking; in this case, bail out.
348 /* Lock the grammar pool. This is necessary if we plan to use the
349 * same grammar pool in multiple threads (this way we can reuse the
350 * same grammar in multiple parsers). Locking the pool disallows any
351 * modifications to the pool, such as an attempt by one of the threads
352 * to cache additional schemas.
356 /* Instantiate a new parser, to process the XML documents.
358 DOMLSParser* parser( create_parser( gp.get() ) );
360 /* Initialize an error handler for the XML document context,
361 * and bind it to the new parser.
363 error_handler eh( "XML Document" );
364 parser->getDomConfig()->setParameter( XMLUni::fgDOMErrorHandler, &eh );
366 /* Process all remaining arguments, as references to XML documents.
368 while( argind < argc )
370 /* Reset the error handler state, prior to loading each document.
373 DOMDocument* doc( parser->parseURI( argv[argind++] ) );
375 /* In this application, all we care about is that the document
376 * can be successfully read by our validating parser; if we did
377 * read it successfully, we have no further use for it, se we
378 * may simply set it aside.
380 if( doc ) doc->release();
382 /* If any error occurred, while parsing the current document,
383 * the error handler will have recorded it; we need to capture
384 * that state here, for our eventual return code.
386 if( eh.has_failed() ) retcode = 1;
388 /* When all specified documents have been validated, we are done
389 * with our parser, so we may release its resource pool.
393 /* Report back, with the cumulative status from XML document parsing.
399 main( int argc, char **argv )
401 /* Fewer than one argument, after the command verb itself,
402 * is not useful; complain, and bail out.
404 if( insufficient_arguments( argc < 2, *argv ) )
407 /* We must initialize Xerces-C++, before we can use it.
409 XMLPlatformUtils::Initialize();
411 /* Determine the validation status for all specified XML documents,
412 * with respect to any specified XML schema definitions.
414 int retcode = validation_status( argc, argv );
416 /* Shut down the Xerces-C++ subsystem, before returning the resultant
417 * validation status code to the operating system.
419 XMLPlatformUtils::Terminate();
423 /* $RCSfile$: end of file */