summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2021-02-25 11:08:35 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2021-02-25 11:08:35 +0300
commit08015a5208b1eb8273bf4ca9dd8779188fdb8a82 (patch)
tree6e7a8dd97dbf55bf7b0e1773fc60d08fb2578e8b
parentd31a6b344973db0c96e77b923753d8b1fa8d7767 (diff)
Release version 3.2.3+2v3.2.3+2
Use caret version constraint for depends manifest values Add config.libxerces_c.network project configuration variable Fix use-after-free error (CVE-2018-1311) Add symlink attribute for symbolic links that refer to upstream subdirectories Fix MinGW GCC's 'redeclared without dllimport attribute' warnings
-rw-r--r--README-DEV8
-rw-r--r--TODO2
-rw-r--r--libxerces-c/INSTALL7
-rw-r--r--libxerces-c/README-DEV74
-rw-r--r--libxerces-c/build/root.build7
-rw-r--r--libxerces-c/manifest16
-rw-r--r--libxerces-c/tests/build/root.build4
-rw-r--r--libxerces-c/tests/buildfile4
-rw-r--r--libxerces-c/tests/dom-print/.gitattributes1
-rw-r--r--libxerces-c/tests/net-accessor/testscript3
-rw-r--r--libxerces-c/tests/psvi-writer/.gitattributes1
-rw-r--r--libxerces-c/tests/sax-print/.gitattributes1
-rw-r--r--libxerces-c/tests/sax2-print/.gitattributes1
-rw-r--r--libxerces-c/xercesc/.gitattributes7
-rw-r--r--libxerces-c/xercesc/buildfile28
-rw-r--r--libxerces-c/xercesc/config.h6
-rw-r--r--libxerces-c/xercesc/dtd-decl-use-after-free.patch20
-rw-r--r--libxerces-c/xercesc/inline-funcs-def-usage-order.patch38
l---------libxerces-c/xercesc/internal1
l---------libxerces-c/xercesc/internal/BinFileOutputStream.cpp1
l---------libxerces-c/xercesc/internal/BinFileOutputStream.hpp1
l---------libxerces-c/xercesc/internal/BinMemOutputStream.cpp1
l---------libxerces-c/xercesc/internal/BinMemOutputStream.hpp1
l---------libxerces-c/xercesc/internal/CharTypeTables.hpp1
l---------libxerces-c/xercesc/internal/DGXMLScanner.cpp1
l---------libxerces-c/xercesc/internal/DGXMLScanner.hpp1
l---------libxerces-c/xercesc/internal/ElemStack.cpp1
l---------libxerces-c/xercesc/internal/ElemStack.hpp1
l---------libxerces-c/xercesc/internal/EndOfEntityException.hpp1
l---------libxerces-c/xercesc/internal/IANAEncodings.hpp1
-rw-r--r--libxerces-c/xercesc/internal/IGXMLScanner.cpp3269
l---------libxerces-c/xercesc/internal/IGXMLScanner.hpp1
l---------libxerces-c/xercesc/internal/IGXMLScanner2.cpp1
l---------libxerces-c/xercesc/internal/MemoryManagerImpl.cpp1
l---------libxerces-c/xercesc/internal/MemoryManagerImpl.hpp1
l---------libxerces-c/xercesc/internal/ReaderMgr.cpp1
l---------libxerces-c/xercesc/internal/ReaderMgr.hpp1
l---------libxerces-c/xercesc/internal/SGXMLScanner.cpp1
l---------libxerces-c/xercesc/internal/SGXMLScanner.hpp1
l---------libxerces-c/xercesc/internal/ValidationContextImpl.cpp1
l---------libxerces-c/xercesc/internal/ValidationContextImpl.hpp1
l---------libxerces-c/xercesc/internal/VecAttrListImpl.cpp1
l---------libxerces-c/xercesc/internal/VecAttrListImpl.hpp1
l---------libxerces-c/xercesc/internal/VecAttributesImpl.cpp1
l---------libxerces-c/xercesc/internal/VecAttributesImpl.hpp1
l---------libxerces-c/xercesc/internal/WFXMLScanner.cpp1
l---------libxerces-c/xercesc/internal/WFXMLScanner.hpp1
l---------libxerces-c/xercesc/internal/XMLInternalErrorHandler.hpp1
l---------libxerces-c/xercesc/internal/XMLReader.cpp1
l---------libxerces-c/xercesc/internal/XMLReader.hpp1
l---------libxerces-c/xercesc/internal/XMLScanner.cpp1
l---------libxerces-c/xercesc/internal/XMLScanner.hpp1
l---------libxerces-c/xercesc/internal/XMLScannerResolver.cpp1
l---------libxerces-c/xercesc/internal/XMLScannerResolver.hpp1
l---------libxerces-c/xercesc/internal/XProtoType.cpp1
l---------libxerces-c/xercesc/internal/XProtoType.hpp1
l---------libxerces-c/xercesc/internal/XSAXMLScanner.cpp1
l---------libxerces-c/xercesc/internal/XSAXMLScanner.hpp1
l---------libxerces-c/xercesc/internal/XSObjectFactory.cpp1
l---------libxerces-c/xercesc/internal/XSObjectFactory.hpp1
l---------libxerces-c/xercesc/internal/XSerializable.hpp1
l---------libxerces-c/xercesc/internal/XSerializationException.hpp1
l---------libxerces-c/xercesc/internal/XSerializeEngine.cpp1
l---------libxerces-c/xercesc/internal/XSerializeEngine.hpp1
l---------libxerces-c/xercesc/internal/XTemplateSerializer.cpp1
l---------libxerces-c/xercesc/internal/XTemplateSerializer.hpp1
-rw-r--r--libxerces-c/xercesc/util/.gitattributes2
-rw-r--r--[l---------]libxerces-c/xercesc/util/KVStringPair.hpp224
68 files changed, 3720 insertions, 50 deletions
diff --git a/README-DEV b/README-DEV
index dbe777f..efb7421 100644
--- a/README-DEV
+++ b/README-DEV
@@ -87,6 +87,11 @@ build2, we end up with the following configuration options:
See the configuration options description at the "Build Instructions" page
(http://xerces.apache.org/xerces-c/build-3.html).
+We, however, disable the network support by default, replacing the
+--enable-netaccessor-curl option with --disable-network. The user, will still
+be able to enable network by setting the config.libxerces_c.network project
+configuration variable to true.
+
Normally, when packaging a project, we need to replace some auto-generated
headers with our own implementations, deduce the compilation/linking options
and the source files to compile. For Xerces-C++ we can rely on the
@@ -110,8 +115,7 @@ $ make V=1 >>build.log 2>&1
Note that on Windows, to reduce complexity, we may build the upstream package
with the native network accessor and transcoder (rather than with the cURL-
-based network accessor and ICU-based transcoder) as it doesn't really affect
-the compilation and linking options.
+based network accessor and ICU-based transcoder).
For MSVC:
diff --git a/TODO b/TODO
index 447c5c4..da80960 100644
--- a/TODO
+++ b/TODO
@@ -1,7 +1,5 @@
On the revision:
-- Remove the redundant spaces from "windows = ($tclass == 'windows')".
-
On the release:
- Use legal{} target type for legal documentation (LICENSE, AUTHORS, etc).
diff --git a/libxerces-c/INSTALL b/libxerces-c/INSTALL
index bfa5487..022fbf8 100644
--- a/libxerces-c/INSTALL
+++ b/libxerces-c/INSTALL
@@ -5,3 +5,10 @@ $ bpkg build libxerces-c
But if you don't want to use the package manager, then you can also build this
package manually using the standard build2 build system.
+
+Note that by default libxerces-c is built without network support. To enable
+it, run the following command instead:
+
+@@ Revise when the package conditional dependencies are supported.
+
+$ bpkg build libxerces-c +{ config.libxerces_c.network=true } libcurl
diff --git a/libxerces-c/README-DEV b/libxerces-c/README-DEV
index 78f4179..36386ed 100644
--- a/libxerces-c/README-DEV
+++ b/libxerces-c/README-DEV
@@ -11,14 +11,20 @@ validators,xinclude} xercesc/
$ ln -s ../../upstream/src/{stricmp,strnicmp}.{h,c} xercesc/
-$ pushd xercesc/util/
+$ mkdir xercesc/internal/ xercesc/util/
+
+$ pushd xercesc/internal/
+$ ln -s ../../../upstream/src/xercesc/internal/*.{cpp,hpp} ./
+
+$ cd ../util/
$ ln -s ../../../upstream/src/xercesc/util/*.{cpp,hpp,c} ./
$ ln -s ../../../upstream/src/xercesc/util/{regx,FileManagers} ./
-Note that the main reason for such a granular linking (we could just link
-upstream's Transcoders/, etc) is reducing the number of preprocessor macros we
-need to deduce in xercesc/config.h (see the change tracking instructions
-below for details). As a bonus it also simplifies the buildfile.
+Note that the main reasons for such a granular linking (we could just link
+upstream's internal/, util/, etc) are source code patching and reducing the
+number of preprocessor macros we need to deduce in xercesc/config.h (see the
+change tracking instructions below for details). As a bonus it also simplifies
+the buildfile.
$ mkdir -p Transcoders NetAccessors MsgLoaders MutexManagers
$ ln -s ../../../../upstream/src/xercesc/util/Transcoders/ICU Transcoders/
@@ -30,18 +36,6 @@ $ ln -s ../../../upstream/src/xercesc/util/XercesVersion.hpp.cmake.in \
XercesVersion.hpp.in
$ popd
-We also patch the explicit template instantiation declarations and definitions
-(see xercesc/util/Xerces_autoconf_config.hpp for details):
-
-$ cp --remove-destination ../upstream/src/xercesc/util/{Janitor.hpp,JanitorExports.cpp} \
- xercesc/util/
-
-$ git apply xercesc/export-template-instantiations.patch
-
-Note that the patch is produced by the following command:
-
-$ git diff >xercesc/export-template-instantiations.patch
-
Use some of the upstream's tests and examples for testing:
$ ln -s ../../../upstream/samples/src/DOMPrint tests/dom-print/
@@ -49,17 +43,49 @@ $ ln -s ../../../upstream/samples/src/SAXPrint tests/sax-print/
$ ln -s ../../../upstream/samples/src/SAX2Print tests/sax2-print/
$ ln -s ../../../upstream/samples/src/PSVIWriter tests/psvi-writer/
-We also patch the net accessor test, which by some reason exits with the zero
-status printing the diagnostics to stdout for some errors:
+We also apply the following patches:
+
+- Fix of the use-after-free error (CVE-2018-1311) triggered during the
+ scanning of external DTDs (see https://security-tracker.debian.org/tracker/CVE-2018-1311
+ for details).
+
+ There is no upstream fix and only suggested mitigations, at time of this
+ writing (see https://issues.apache.org/jira/browse/XERCESC-2188 for
+ details). Thus, we mitigate the issue at the expense of a memory leak, as it
+ is done by Debian (https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=947431).
+
+ $ cp --remove-destination ../upstream/src/xercesc/internal/IGXMLScanner.cpp \
+ xercesc/internal/
+
+ $ git apply xercesc/dtd-decl-use-after-free.patch
+
+- The explicit template instantiation declarations and definitions patch (see
+ xercesc/util/Xerces_autoconf_config.hpp for details):
+
+ $ cp --remove-destination ../upstream/src/xercesc/util/{Janitor.hpp,JanitorExports.cpp} \
+ xercesc/util/
+
+ $ git apply xercesc/export-template-instantiations.patch
+
+- The inline functions definition/usage order change to prevent MinGW GCC
+ from complaining when compile code that uses libxerces-c:
+
+ $ cp --remove-destination ../upstream/src/xercesc/util/KVStringPair.hpp \
+ xercesc/util/
+
+ $ git apply xercesc/inline-funcs-def-usage-order.patch
+
+- Patch of the net accessor test, which by some reason exits with the zero
+ status printing the diagnostics to stdout for some errors:
-$ cp ../upstream/tests/src/NetAccessorTest/NetAccessorTest.cpp \
- tests/net-accessor/
+ $ cp ../upstream/tests/src/NetAccessorTest/NetAccessorTest.cpp \
+ tests/net-accessor/
-$ git apply tests/net-accessor/handle-exception-as-error.patch
+ $ git apply tests/net-accessor/handle-exception-as-error.patch
-Note that the patch is produced by the following command:
+Note that the above patches are produced by the following commands:
-$ git diff >tests/net-accessor/handle-exception-as-error.patch
+$ git diff ><patch-path>
Create xercesc/{config.h,util/Xerces_autoconf_config.hpp} using as a base the
upstream's config.h.cmake.in, config.h.in, and
diff --git a/libxerces-c/build/root.build b/libxerces-c/build/root.build
index 92eb1cb..85934ec 100644
--- a/libxerces-c/build/root.build
+++ b/libxerces-c/build/root.build
@@ -1,6 +1,13 @@
# file : build/root.build
# license : Apache License 2.0; see accompanying LICENSE file
+# Enable network support so that the parser can download remote resources
+# (schemas, DTDs, etc).
+#
+# If enabled, then the libcurl library is used to access network resources.
+#
+config [bool] config.libxerces_c.network ?= false
+
# We rely on C99 in macro deductions (see xercesc/config.h and
# xercesc/util/Xerces_autoconf_config.hpp for details).
#
diff --git a/libxerces-c/manifest b/libxerces-c/manifest
index c40c2e6..66425d1 100644
--- a/libxerces-c/manifest
+++ b/libxerces-c/manifest
@@ -3,7 +3,7 @@ name: libxerces-c
# Note: remember to update doc-url below!
#
-version: 3.2.3+1
+version: 3.2.3+2
project: xerces-c
summary: Validating XML parsing and serialization C++ library
@@ -17,14 +17,12 @@ package-url: https://git.build2.org/cgit/packaging/xerces-c/
email: c-users@xerces.apache.org ; Mailing list.
package-email: packaging@build2.org ; Mailing list.
build-error-email: builds@build2.org
+depends: * build2 >= 0.12.0
+depends: * bpkg >= 0.12.0
-# Don't build the legacy configurations since the respective machines may not
-# have enough disk space for that.
+# @@ Disable until conditional dependencies are supported.
#
-builds: default
+#depends: libcurl ^7.67.0
-depends: * build2 >= 0.12.0
-depends: * bpkg >= 0.12.0
-depends: libcurl >= 7.67.0
-depends: libicuuc >= 65.1.0
-depends: libicui18n >= 65.1.0
+depends: libicuuc ^65.1.0
+depends: libicui18n ^65.1.0
diff --git a/libxerces-c/tests/build/root.build b/libxerces-c/tests/build/root.build
index 7bf1cba..261a20a 100644
--- a/libxerces-c/tests/build/root.build
+++ b/libxerces-c/tests/build/root.build
@@ -1,6 +1,10 @@
# file : tests/build/root.build
# license : Apache License 2.0; see accompanying LICENSE file
+# Test network support (see parent project for details).
+#
+config [bool] config.libxerces_c.network ?= false
+
cxx.std = latest
using cxx
diff --git a/libxerces-c/tests/buildfile b/libxerces-c/tests/buildfile
index f1869ed..08636f9 100644
--- a/libxerces-c/tests/buildfile
+++ b/libxerces-c/tests/buildfile
@@ -1,4 +1,6 @@
# file : tests/buildfile
# license : Apache License 2.0; see accompanying LICENSE file
-./: {*/ -build/}
+./: {*/ -build/ -net-accessor/}
+
+./: net-accessor/: include = $config.libxerces_c.network
diff --git a/libxerces-c/tests/dom-print/.gitattributes b/libxerces-c/tests/dom-print/.gitattributes
new file mode 100644
index 0000000..fe24c88
--- /dev/null
+++ b/libxerces-c/tests/dom-print/.gitattributes
@@ -0,0 +1 @@
+DOMPrint symlink=dir
diff --git a/libxerces-c/tests/net-accessor/testscript b/libxerces-c/tests/net-accessor/testscript
index 993423b..55a810a 100644
--- a/libxerces-c/tests/net-accessor/testscript
+++ b/libxerces-c/tests/net-accessor/testscript
@@ -15,7 +15,8 @@ $* 'http://www.example.com' >>~%EOO%
:
: Test that an HTTPS URL is queried successfully using libcurl via the system
: SSL backend on Windows and MacOS/Clang and fails for other targets that use
-: the OpenSSL backend by default.
+: the OpenSSL backend by default, since the CA certificates can not be
+: located.
:
if ($cxx.target.class == 'windows' || \
$cxx.target.class == 'macos' && $cxx.id == 'clang-apple')
diff --git a/libxerces-c/tests/psvi-writer/.gitattributes b/libxerces-c/tests/psvi-writer/.gitattributes
new file mode 100644
index 0000000..3f00e13
--- /dev/null
+++ b/libxerces-c/tests/psvi-writer/.gitattributes
@@ -0,0 +1 @@
+PSVIWriter symlink=dir
diff --git a/libxerces-c/tests/sax-print/.gitattributes b/libxerces-c/tests/sax-print/.gitattributes
new file mode 100644
index 0000000..dd82b71
--- /dev/null
+++ b/libxerces-c/tests/sax-print/.gitattributes
@@ -0,0 +1 @@
+SAXPrint symlink=dir
diff --git a/libxerces-c/tests/sax2-print/.gitattributes b/libxerces-c/tests/sax2-print/.gitattributes
new file mode 100644
index 0000000..1746049
--- /dev/null
+++ b/libxerces-c/tests/sax2-print/.gitattributes
@@ -0,0 +1 @@
+SAX2Print symlink=dir
diff --git a/libxerces-c/xercesc/.gitattributes b/libxerces-c/xercesc/.gitattributes
new file mode 100644
index 0000000..84486b8
--- /dev/null
+++ b/libxerces-c/xercesc/.gitattributes
@@ -0,0 +1,7 @@
+dom symlink=dir
+framework symlink=dir
+parsers symlink=dir
+sax symlink=dir
+sax2 symlink=dir
+validators symlink=dir
+xinclude symlink=dir
diff --git a/libxerces-c/xercesc/buildfile b/libxerces-c/xercesc/buildfile
index 7124293..b3cb923 100644
--- a/libxerces-c/xercesc/buildfile
+++ b/libxerces-c/xercesc/buildfile
@@ -1,29 +1,36 @@
# file : xercesc/buildfile
# license : Apache License 2.0; see accompanying LICENSE file
+network = $config.libxerces_c.network
+
# Note that the installed util/NetAccessors/Curl/CurlURLInputStream.hpp
# includes the libcurl headers.
#
-import int_libs = libcurl%lib{curl}
+if $network
+ import intf_libs = libcurl%lib{curl}
-import imp_libs = libicuuc%lib{icuuc}
-import imp_libs += libicui18n%lib{icui18n}
+import impl_libs = libicuuc%lib{icuuc}
+import impl_libs += libicui18n%lib{icui18n}
-lib{xerces-c}: {h }{config } \
- {hxx txx cxx}{*/** -util/FileManagers/** -util/XercesVersion} \
- {hxx }{ util/XercesVersion} \
- $int_libs $imp_libs
+lib{xerces-c}: {h }{config} \
+ {hxx txx cxx}{*/** -util/FileManagers/** \
+ -util/NetAccessors/** \
+ -util/XercesVersion} \
+ {hxx }{ util/XercesVersion} \
+ $intf_libs $impl_libs
tclass = $cxx.target.class
tsys = $cxx.target.system
-windows = ($tclass == 'windows')
+windows = ($tclass == 'windows')
lib{xerces-c}: {h c}{stricmp strnicmp} \
util/FileManagers/{hxx cxx}{PosixFileMgr }: include = (!$windows)
lib{xerces-c}: util/FileManagers/{hxx cxx}{WindowsFileMgr}: include = $windows
+lib{xerces-c}: util/NetAccessors/Curl/{hxx cxx}{*}: include = $network
+
# Include the generated version header into the distribution (so that we
# don't pick up an installed one) and don't remove it when cleaning in src (so
# that clean results in a state identical to distributed).
@@ -44,6 +51,9 @@ util/hxx{XercesVersion}: util/in{XercesVersion} $src_root/manifest
# Build options.
#
+if $network
+ cc.poptions += -DXERCES_USE_NETACCESSOR_CURL=1
+
cc.poptions += -DXERCES_BUILDING_LIBRARY -DHAVE_CONFIG_H -D_THREAD_SAFE
if $windows
@@ -120,7 +130,7 @@ switch $tclass, $tsys
lib{xerces-c}:
{
cxx.export.poptions = "-I$out_root" "-I$src_root"
- cxx.export.libs = $int_libs
+ cxx.export.libs = $intf_libs
}
liba{xerces-c}: cxx.export.poptions += -DLIBXERCES_C_STATIC
diff --git a/libxerces-c/xercesc/config.h b/libxerces-c/xercesc/config.h
index 48bd880..ab2ea78 100644
--- a/libxerces-c/xercesc/config.h
+++ b/libxerces-c/xercesc/config.h
@@ -32,8 +32,14 @@
#undef XERCES_USE_TRANSCODER_WINDOWS
/* Use the Curl net accessor.
+ *
+ * Note that the XERCES_USE_NETACCESSOR_CURL macro is defined via the
+ * preprocessor option if the network support is enabled (see buildfile for
+ * details).
*/
+/*
#define XERCES_USE_NETACCESSOR_CURL 1
+*/
#undef XERCES_USE_NETACCESSOR_CFURL
#undef XERCES_USE_NETACCESSOR_SOCKET
#undef XERCES_USE_NETACCESSOR_WINSOCK
diff --git a/libxerces-c/xercesc/dtd-decl-use-after-free.patch b/libxerces-c/xercesc/dtd-decl-use-after-free.patch
new file mode 100644
index 0000000..4a48a96
--- /dev/null
+++ b/libxerces-c/xercesc/dtd-decl-use-after-free.patch
@@ -0,0 +1,20 @@
+diff --git a/libxerces-c/xercesc/internal/IGXMLScanner.cpp b/libxerces-c/xercesc/internal/IGXMLScanner.cpp
+index 0062400..aa344be 100644
+--- a/libxerces-c/xercesc/internal/IGXMLScanner.cpp
++++ b/libxerces-c/xercesc/internal/IGXMLScanner.cpp
+@@ -1532,7 +1532,6 @@ void IGXMLScanner::scanDocTypeDecl()
+ DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
+ declDTD->setSystemId(sysId);
+ declDTD->setIsExternal(true);
+- Janitor<DTDEntityDecl> janDecl(declDTD);
+
+ // Mark this one as a throw at end
+ reader->setThrowAtEnd(true);
+@@ -3095,7 +3094,6 @@ Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
+ DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
+ declDTD->setSystemId(src.getSystemId());
+ declDTD->setIsExternal(true);
+- Janitor<DTDEntityDecl> janDecl(declDTD);
+
+ // Mark this one as a throw at end
+ newReader->setThrowAtEnd(true);
diff --git a/libxerces-c/xercesc/inline-funcs-def-usage-order.patch b/libxerces-c/xercesc/inline-funcs-def-usage-order.patch
new file mode 100644
index 0000000..1c7fc0c
--- /dev/null
+++ b/libxerces-c/xercesc/inline-funcs-def-usage-order.patch
@@ -0,0 +1,38 @@
+diff --git a/libxerces-c/xercesc/util/KVStringPair.hpp b/libxerces-c/xercesc/util/KVStringPair.hpp
+index aeb787a..87e2947 100644
+--- a/libxerces-c/xercesc/util/KVStringPair.hpp
++++ b/libxerces-c/xercesc/util/KVStringPair.hpp
+@@ -163,16 +163,6 @@ inline XMLCh* KVStringPair::getValue()
+ // ---------------------------------------------------------------------------
+ // KVStringPair: Setters
+ // ---------------------------------------------------------------------------
+-inline void KVStringPair::setKey(const XMLCh* const newKey)
+-{
+- setKey(newKey, XMLString::stringLen(newKey));
+-}
+-
+-inline void KVStringPair::setValue(const XMLCh* const newValue)
+-{
+- setValue(newValue, XMLString::stringLen(newValue));
+-}
+-
+ inline void KVStringPair::setKey( const XMLCh* const newKey
+ , const XMLSize_t newKeyLength)
+ {
+@@ -201,6 +191,16 @@ inline void KVStringPair::setValue( const XMLCh* const newValue
+ memcpy(fValue, newValue, (newValueLength+1) * sizeof(XMLCh)); // len+1 because of the 0 at the end
+ }
+
++inline void KVStringPair::setKey(const XMLCh* const newKey)
++{
++ setKey(newKey, XMLString::stringLen(newKey));
++}
++
++inline void KVStringPair::setValue(const XMLCh* const newValue)
++{
++ setValue(newValue, XMLString::stringLen(newValue));
++}
++
+ inline void KVStringPair::set( const XMLCh* const newKey
+ , const XMLCh* const newValue)
+ {
diff --git a/libxerces-c/xercesc/internal b/libxerces-c/xercesc/internal
deleted file mode 120000
index 7daca54..0000000
--- a/libxerces-c/xercesc/internal
+++ /dev/null
@@ -1 +0,0 @@
-../../upstream/src/xercesc/internal \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/BinFileOutputStream.cpp b/libxerces-c/xercesc/internal/BinFileOutputStream.cpp
new file mode 120000
index 0000000..a3c04bc
--- /dev/null
+++ b/libxerces-c/xercesc/internal/BinFileOutputStream.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/BinFileOutputStream.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/BinFileOutputStream.hpp b/libxerces-c/xercesc/internal/BinFileOutputStream.hpp
new file mode 120000
index 0000000..e3d4f83
--- /dev/null
+++ b/libxerces-c/xercesc/internal/BinFileOutputStream.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/BinFileOutputStream.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/BinMemOutputStream.cpp b/libxerces-c/xercesc/internal/BinMemOutputStream.cpp
new file mode 120000
index 0000000..81d404e
--- /dev/null
+++ b/libxerces-c/xercesc/internal/BinMemOutputStream.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/BinMemOutputStream.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/BinMemOutputStream.hpp b/libxerces-c/xercesc/internal/BinMemOutputStream.hpp
new file mode 120000
index 0000000..0d12574
--- /dev/null
+++ b/libxerces-c/xercesc/internal/BinMemOutputStream.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/BinMemOutputStream.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/CharTypeTables.hpp b/libxerces-c/xercesc/internal/CharTypeTables.hpp
new file mode 120000
index 0000000..252ff31
--- /dev/null
+++ b/libxerces-c/xercesc/internal/CharTypeTables.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/CharTypeTables.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/DGXMLScanner.cpp b/libxerces-c/xercesc/internal/DGXMLScanner.cpp
new file mode 120000
index 0000000..174dc3a
--- /dev/null
+++ b/libxerces-c/xercesc/internal/DGXMLScanner.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/DGXMLScanner.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/DGXMLScanner.hpp b/libxerces-c/xercesc/internal/DGXMLScanner.hpp
new file mode 120000
index 0000000..a86e666
--- /dev/null
+++ b/libxerces-c/xercesc/internal/DGXMLScanner.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/DGXMLScanner.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/ElemStack.cpp b/libxerces-c/xercesc/internal/ElemStack.cpp
new file mode 120000
index 0000000..8179d5a
--- /dev/null
+++ b/libxerces-c/xercesc/internal/ElemStack.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/ElemStack.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/ElemStack.hpp b/libxerces-c/xercesc/internal/ElemStack.hpp
new file mode 120000
index 0000000..f23fbff
--- /dev/null
+++ b/libxerces-c/xercesc/internal/ElemStack.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/ElemStack.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/EndOfEntityException.hpp b/libxerces-c/xercesc/internal/EndOfEntityException.hpp
new file mode 120000
index 0000000..df53197
--- /dev/null
+++ b/libxerces-c/xercesc/internal/EndOfEntityException.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/EndOfEntityException.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/IANAEncodings.hpp b/libxerces-c/xercesc/internal/IANAEncodings.hpp
new file mode 120000
index 0000000..eff0e97
--- /dev/null
+++ b/libxerces-c/xercesc/internal/IANAEncodings.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/IANAEncodings.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/IGXMLScanner.cpp b/libxerces-c/xercesc/internal/IGXMLScanner.cpp
new file mode 100644
index 0000000..aa344be
--- /dev/null
+++ b/libxerces-c/xercesc/internal/IGXMLScanner.cpp
@@ -0,0 +1,3269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * $Id$
+ */
+
+// ---------------------------------------------------------------------------
+// Includes
+// ---------------------------------------------------------------------------
+#include <xercesc/internal/IGXMLScanner.hpp>
+#include <xercesc/util/RuntimeException.hpp>
+#include <xercesc/util/UnexpectedEOFException.hpp>
+#include <xercesc/sax/InputSource.hpp>
+#include <xercesc/framework/XMLDocumentHandler.hpp>
+#include <xercesc/framework/XMLEntityHandler.hpp>
+#include <xercesc/framework/XMLPScanToken.hpp>
+#include <xercesc/internal/EndOfEntityException.hpp>
+#include <xercesc/framework/MemoryManager.hpp>
+#include <xercesc/framework/XMLGrammarPool.hpp>
+#include <xercesc/framework/XMLDTDDescription.hpp>
+#include <xercesc/framework/psvi/PSVIElement.hpp>
+#include <xercesc/framework/psvi/PSVIHandler.hpp>
+#include <xercesc/framework/psvi/PSVIAttributeList.hpp>
+#include <xercesc/validators/common/GrammarResolver.hpp>
+#include <xercesc/validators/DTD/DocTypeHandler.hpp>
+#include <xercesc/validators/DTD/DTDScanner.hpp>
+#include <xercesc/validators/DTD/DTDValidator.hpp>
+#include <xercesc/validators/schema/SchemaValidator.hpp>
+#include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
+#include <xercesc/validators/schema/identity/IC_Selector.hpp>
+#include <xercesc/util/OutOfMemoryException.hpp>
+
+XERCES_CPP_NAMESPACE_BEGIN
+
+
+typedef JanitorMemFunCall<IGXMLScanner> CleanupType;
+typedef JanitorMemFunCall<ReaderMgr> ReaderMgrResetType;
+
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Constructors and Destructor
+// ---------------------------------------------------------------------------
+IGXMLScanner::IGXMLScanner( XMLValidator* const valToAdopt
+ , GrammarResolver* const grammarResolver
+ , MemoryManager* const manager) :
+
+ XMLScanner(valToAdopt, grammarResolver, manager)
+ , fSeeXsi(false)
+ , fGrammarType(Grammar::UnKnown)
+ , fElemStateSize(16)
+ , fElemState(0)
+ , fElemLoopState(0)
+ , fContent(1023, manager)
+ , fRawAttrList(0)
+ , fRawAttrColonListSize(32)
+ , fRawAttrColonList(0)
+ , fDTDValidator(0)
+ , fSchemaValidator(0)
+ , fDTDGrammar(0)
+ , fICHandler(0)
+ , fLocationPairs(0)
+ , fDTDElemNonDeclPool(0)
+ , fSchemaElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistry(0)
+ , fPSVIAttrList(0)
+ , fModel(0)
+ , fPSVIElement(0)
+ , fErrorStack(0)
+ , fSchemaInfoList(0)
+ , fCachedSchemaInfoList (0)
+{
+ CleanupType cleanup(this, &IGXMLScanner::cleanUp);
+
+ try
+ {
+ commonInit();
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // Don't cleanup when out of memory, since executing the
+ // code can cause problems.
+ cleanup.release();
+
+ throw;
+ }
+
+ cleanup.release();
+}
+
+IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler
+ , DocTypeHandler* const docTypeHandler
+ , XMLEntityHandler* const entityHandler
+ , XMLErrorReporter* const errHandler
+ , XMLValidator* const valToAdopt
+ , GrammarResolver* const grammarResolver
+ , MemoryManager* const manager) :
+
+ XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
+ , fSeeXsi(false)
+ , fGrammarType(Grammar::UnKnown)
+ , fElemStateSize(16)
+ , fElemState(0)
+ , fElemLoopState(0)
+ , fContent(1023, manager)
+ , fRawAttrList(0)
+ , fRawAttrColonListSize(32)
+ , fRawAttrColonList(0)
+ , fDTDValidator(0)
+ , fSchemaValidator(0)
+ , fDTDGrammar(0)
+ , fICHandler(0)
+ , fLocationPairs(0)
+ , fDTDElemNonDeclPool(0)
+ , fSchemaElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistry(0)
+ , fPSVIAttrList(0)
+ , fModel(0)
+ , fPSVIElement(0)
+ , fErrorStack(0)
+ , fSchemaInfoList(0)
+ , fCachedSchemaInfoList (0)
+{
+ CleanupType cleanup(this, &IGXMLScanner::cleanUp);
+
+ try
+ {
+ commonInit();
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // Don't cleanup when out of memory, since executing the
+ // code can cause problems.
+ cleanup.release();
+
+ throw;
+ }
+
+ cleanup.release();
+}
+
+IGXMLScanner::~IGXMLScanner()
+{
+ cleanUp();
+}
+
+// ---------------------------------------------------------------------------
+// XMLScanner: Getter methods
+// ---------------------------------------------------------------------------
+NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool()
+{
+ if(!fDTDGrammar)
+ return 0;
+ return fDTDGrammar->getEntityDeclPool();
+}
+
+const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const
+{
+ if(!fDTDGrammar)
+ return 0;
+ return fDTDGrammar->getEntityDeclPool();
+}
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Main entry point to scan a document
+// ---------------------------------------------------------------------------
+void IGXMLScanner::scanDocument(const InputSource& src)
+{
+ // Bump up the sequence id for this parser instance. This will invalidate
+ // any previous progressive scan tokens.
+ fSequenceId++;
+
+ ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
+
+ try
+ {
+ // Reset the scanner and its plugged in stuff for a new run. This
+ // resets all the data structures, creates the initial reader and
+ // pushes it on the stack, and sets up the base document path.
+ scanReset(src);
+
+ // If we have a document handler, then call the start document
+ if (fDocHandler)
+ fDocHandler->startDocument();
+
+ // Scan the prolog part, which is everything before the root element
+ // including the DTD subsets.
+ scanProlog();
+
+ // If we got to the end of input, then its not a valid XML file.
+ // Else, go on to scan the content.
+ if (fReaderMgr.atEOF())
+ {
+ emitError(XMLErrs::EmptyMainEntity);
+ }
+ else
+ {
+ // Scan content, and tell it its not an external entity
+ if (scanContent())
+ {
+ // Do post-parse validation if required
+ if (fValidate)
+ {
+ // We handle ID reference semantics at this level since
+ // its required by XML 1.0.
+ checkIDRefs();
+
+ // Then allow the validator to do any extra stuff it wants
+// fValidator->postParseValidation();
+ }
+
+ // That went ok, so scan for any miscellaneous stuff
+ if (!fReaderMgr.atEOF())
+ scanMiscellaneous();
+ }
+ }
+
+ // If we have a document handler, then call the end document
+ if (fDocHandler)
+ fDocHandler->endDocument();
+
+ //cargill debug:
+ //fGrammarResolver->getXSModel();
+ }
+ // NOTE:
+ //
+ // In all of the error processing below, the emitError() call MUST come
+ // before the flush of the reader mgr, or it will fail because it tries
+ // to find out the position in the XML source of the error.
+ catch(const XMLErrs::Codes)
+ {
+ // This is a 'first failure' exception, so fall through
+ }
+ catch(const XMLValid::Codes)
+ {
+ // This is a 'first fatal error' type exit, so fall through
+ }
+ catch(const XMLException& excToCatch)
+ {
+ // Emit the error and catch any user exception thrown from here. Make
+ // sure in all cases we flush the reader manager.
+ fInException = true;
+ try
+ {
+ if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
+ emitError
+ (
+ XMLErrs::XMLException_Warning
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
+ emitError
+ (
+ XMLErrs::XMLException_Fatal
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ else
+ emitError
+ (
+ XMLErrs::XMLException_Error
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // This is a special case for out-of-memory
+ // conditions, because resetting the ReaderMgr
+ // can be problematic.
+ resetReaderMgr.release();
+
+ throw;
+ }
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // This is a special case for out-of-memory
+ // conditions, because resetting the ReaderMgr
+ // can be problematic.
+ resetReaderMgr.release();
+
+ throw;
+ }
+}
+
+
+bool IGXMLScanner::scanNext(XMLPScanToken& token)
+{
+ // Make sure this token is still legal
+ if (!isLegalToken(token))
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
+
+ // Find the next token and remember the reader id
+ XMLSize_t orgReader;
+ XMLTokens curToken;
+
+ ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
+
+ bool retVal = true;
+
+ try
+ {
+ while (true)
+ {
+ // We have to handle any end of entity exceptions that happen here.
+ // We could be at the end of X nested entities, each of which will
+ // generate an end of entity exception as we try to move forward.
+ try
+ {
+ curToken = senseNextToken(orgReader);
+ break;
+ }
+ catch(const EndOfEntityException& toCatch)
+ {
+ // Send an end of entity reference event
+ if (fDocHandler)
+ fDocHandler->endEntityReference(toCatch.getEntity());
+ }
+ }
+
+ if (curToken == Token_CharData)
+ {
+ scanCharData(fCDataBuf);
+ }
+ else if (curToken == Token_EOF)
+ {
+ if (!fElemStack.isEmpty())
+ {
+ const ElemStack::StackElem* topElem = fElemStack.popTop();
+ emitError
+ (
+ XMLErrs::EndedWithTagsOnStack
+ , topElem->fThisElement->getFullName()
+ );
+ }
+
+ retVal = false;
+ }
+ else
+ {
+ // Its some sort of markup
+ bool gotData = true;
+ switch(curToken)
+ {
+ case Token_CData :
+ // Make sure we are within content
+ if (fElemStack.isEmpty())
+ emitError(XMLErrs::CDATAOutsideOfContent);
+ scanCDSection();
+ break;
+
+ case Token_Comment :
+ scanComment();
+ break;
+
+ case Token_EndTag :
+ scanEndTag(gotData);
+ break;
+
+ case Token_PI :
+ scanPI();
+ break;
+
+ case Token_StartTag :
+ if (fDoNamespaces)
+ scanStartTagNS(gotData);
+ else
+ scanStartTag(gotData);
+ break;
+
+ default :
+ fReaderMgr.skipToChar(chOpenAngle);
+ break;
+ }
+
+ if (orgReader != fReaderMgr.getCurrentReaderNum())
+ emitError(XMLErrs::PartialMarkupInEntity);
+
+ // If we hit the end, then do the miscellaneous part
+ if (!gotData)
+ {
+ // Do post-parse validation if required
+ if (fValidate)
+ {
+ // We handle ID reference semantics at this level since
+ // its required by XML 1.0.
+ checkIDRefs();
+
+ // Then allow the validator to do any extra stuff it wants
+// fValidator->postParseValidation();
+ }
+
+ // That went ok, so scan for any miscellaneous stuff
+ scanMiscellaneous();
+
+ if (toCheckIdentityConstraint())
+ fICHandler->endDocument();
+
+ if (fDocHandler)
+ fDocHandler->endDocument();
+ }
+ }
+ }
+ // NOTE:
+ //
+ // In all of the error processing below, the emitError() call MUST come
+ // before the flush of the reader mgr, or it will fail because it tries
+ // to find out the position in the XML source of the error.
+ catch(const XMLErrs::Codes)
+ {
+ // This is a 'first failure' exception so return failure
+ retVal = false;
+ }
+ catch(const XMLValid::Codes)
+ {
+ // This is a 'first fatal error' type exit, so return failure
+ retVal = false;
+ }
+ catch(const XMLException& excToCatch)
+ {
+ // Emit the error and catch any user exception thrown from here. Make
+ // sure in all cases we flush the reader manager.
+ fInException = true;
+ try
+ {
+ if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
+ emitError
+ (
+ XMLErrs::XMLException_Warning
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
+ emitError
+ (
+ XMLErrs::XMLException_Fatal
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ else
+ emitError
+ (
+ XMLErrs::XMLException_Error
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // This is a special case for out-of-memory
+ // conditions, because resetting the ReaderMgr
+ // can be problematic.
+ resetReaderMgr.release();
+
+ throw;
+ }
+
+ retVal = false;
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // This is a special case for out-of-memory
+ // conditions, because resetting the ReaderMgr
+ // can be problematic.
+ resetReaderMgr.release();
+
+ throw;
+ }
+
+ // If we are not at the end, release the object that will
+ // reset the ReaderMgr.
+ if (retVal)
+ resetReaderMgr.release();
+
+ return retVal;
+}
+
+
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Private helper methods. Most of these are implemented in
+// IGXMLScanner2.Cpp.
+// ---------------------------------------------------------------------------
+
+// This method handles the common initialization, to avoid having to do
+// it redundantly in multiple constructors.
+void IGXMLScanner::commonInit()
+{
+
+ // Create the element state array
+ fElemState = (unsigned int*) fMemoryManager->allocate
+ (
+ fElemStateSize * sizeof(unsigned int)
+ ); //new unsigned int[fElemStateSize];
+ fElemLoopState = (unsigned int*) fMemoryManager->allocate
+ (
+ fElemStateSize * sizeof(unsigned int)
+ ); //new unsigned int[fElemStateSize];
+
+ // And we need one for the raw attribute scan. This just stores key/
+ // value string pairs (prior to any processing.)
+ fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
+ fRawAttrColonList = (int*) fMemoryManager->allocate
+ (
+ fRawAttrColonListSize * sizeof(int)
+ );
+
+ // Create the Validator and init them
+ fDTDValidator = new (fMemoryManager) DTDValidator();
+ initValidator(fDTDValidator);
+ fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
+ initValidator(fSchemaValidator);
+
+ // Create IdentityConstraint info
+ fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
+
+ // Create schemaLocation pair info
+ fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
+ // create pools for undeclared elements
+ fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
+ fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
+ fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
+ (
+ 131, false, fMemoryManager
+ );
+ fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
+ fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
+
+ fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
+ fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
+
+ // use fDTDValidator as the default validator
+ if (!fValidator)
+ fValidator = fDTDValidator;
+}
+
+void IGXMLScanner::cleanUp()
+{
+ fMemoryManager->deallocate(fElemState); //delete [] fElemState;
+ fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
+ delete fRawAttrList;
+ fMemoryManager->deallocate(fRawAttrColonList);
+ delete fDTDValidator;
+ delete fSchemaValidator;
+ delete fICHandler;
+ delete fLocationPairs;
+ delete fDTDElemNonDeclPool;
+ delete fSchemaElemNonDeclPool;
+ delete fAttDefRegistry;
+ delete fUndeclaredAttrRegistry;
+ delete fPSVIAttrList;
+ delete fPSVIElement;
+ delete fErrorStack;
+ delete fSchemaInfoList;
+ delete fCachedSchemaInfoList;
+}
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Private scanning methods
+// ---------------------------------------------------------------------------
+
+// This method is called from scanStartTag() to handle the very raw initial
+// scan of the attributes. It just fills in the passed collection with
+// key/value pairs for each attribute. No processing is done on them at all.
+XMLSize_t
+IGXMLScanner::rawAttrScan(const XMLCh* const elemName
+ , RefVectorOf<KVStringPair>& toFill
+ , bool& isEmpty)
+{
+ // Keep up with how many attributes we've seen so far, and how many
+ // elements are available in the vector. This way we can reuse old
+ // elements until we run out and then expand it.
+ XMLSize_t attCount = 0;
+ XMLSize_t curVecSize = toFill.size();
+
+ // Assume it is not empty
+ isEmpty = false;
+
+ // We loop until we either see a /> or >, handling key/value pairs util
+ // we get there. We place them in the passed vector, which we will expand
+ // as required to hold them.
+ while (true)
+ {
+ // Get the next character, which should be non-space
+ XMLCh nextCh = fReaderMgr.peekNextChar();
+
+ // If the next character is not a slash or closed angle bracket,
+ // then it must be whitespace, since whitespace is required
+ // between the end of the last attribute and the name of the next
+ // one.
+ //
+ if (attCount)
+ {
+ if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
+ {
+ bool bFoundSpace;
+ fReaderMgr.skipPastSpaces(bFoundSpace);
+ if (!bFoundSpace)
+ {
+ // Emit the error but keep on going
+ emitError(XMLErrs::ExpectedWhitespace);
+ }
+ // Ok, peek another char
+ nextCh = fReaderMgr.peekNextChar();
+ }
+ }
+
+ // Ok, here we first check for any of the special case characters.
+ // If its not one, then we do the normal case processing, which
+ // assumes that we've hit an attribute value, Otherwise, we do all
+ // the special case checks.
+ if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
+ {
+ // Assume it's going to be an attribute, so get a name from
+ // the input.
+ int colonPosition;
+ if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
+ {
+ if (fAttNameBuf.isEmpty())
+ emitError(XMLErrs::ExpectedAttrName);
+ else
+ emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return attCount;
+ }
+
+ const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
+
+ // And next must be an equal sign
+ if (!scanEq())
+ {
+ static const XMLCh tmpList[] =
+ {
+ chSingleQuote, chDoubleQuote, chCloseAngle
+ , chOpenAngle, chForwardSlash, chNull
+ };
+
+ emitError(XMLErrs::ExpectedEqSign);
+
+ // Try to sync back up by skipping forward until we either
+ // hit something meaningful.
+ const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
+
+ if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
+ {
+ // Jump back to top for normal processing of these
+ continue;
+ }
+ else if ((chFound == chSingleQuote)
+ || (chFound == chDoubleQuote)
+ || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
+ {
+ // Just fall through assuming that the value is to follow
+ }
+ else if (chFound == chOpenAngle)
+ {
+ // Assume a malformed tag and that new one is starting
+ emitError(XMLErrs::UnterminatedStartTag, elemName);
+ return attCount;
+ }
+ else
+ {
+ // Something went really wrong
+ return attCount;
+ }
+ }
+
+ // Next should be the quoted attribute value. We just do a simple
+ // and stupid scan of this value. The only thing we do here
+ // is to expand entity references.
+ if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
+ {
+ static const XMLCh tmpList[] =
+ {
+ chCloseAngle, chOpenAngle, chForwardSlash, chNull
+ };
+
+ emitError(XMLErrs::ExpectedAttrValue);
+
+ // It failed, so lets try to get synced back up. We skip
+ // forward until we find some whitespace or one of the
+ // chars in our list.
+ const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
+
+ if ((chFound == chCloseAngle)
+ || (chFound == chForwardSlash)
+ || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
+ {
+ // Just fall through and process this attribute, though
+ // the value will be "".
+ }
+ else if (chFound == chOpenAngle)
+ {
+ // Assume a malformed tag and that new one is starting
+ emitError(XMLErrs::UnterminatedStartTag, elemName);
+ return attCount;
+ }
+ else
+ {
+ // Something went really wrong
+ return attCount;
+ }
+ }
+
+ // And now lets add it to the passed collection. If we have not
+ // filled it up yet, then we use the next element. Else we add
+ // a new one.
+ KVStringPair* curPair = 0;
+ if (attCount >= curVecSize)
+ {
+ curPair = new (fMemoryManager) KVStringPair
+ (
+ curAttNameBuf
+ , fAttNameBuf.getLen()
+ , fAttValueBuf.getRawBuffer()
+ , fAttValueBuf.getLen()
+ , fMemoryManager
+ );
+ toFill.addElement(curPair);
+ }
+ else
+ {
+ curPair = toFill.elementAt(attCount);
+ curPair->set
+ (
+ curAttNameBuf,
+ fAttNameBuf.getLen(),
+ fAttValueBuf.getRawBuffer(),
+ fAttValueBuf.getLen()
+ );
+ }
+
+ if (attCount >= fRawAttrColonListSize) {
+ resizeRawAttrColonList();
+ }
+ // Set the position of the colon and bump the count of attributes we've gotten
+ fRawAttrColonList[attCount++] = colonPosition;
+
+ // And go to the top again for another attribute
+ continue;
+ }
+
+ // It was some special case character so do all of the checks and
+ // deal with it.
+ if (!nextCh)
+ ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
+
+ if (nextCh == chForwardSlash)
+ {
+ fReaderMgr.getNextChar();
+ isEmpty = true;
+ if (!fReaderMgr.skippedChar(chCloseAngle))
+ emitError(XMLErrs::UnterminatedStartTag, elemName);
+ break;
+ }
+ else if (nextCh == chCloseAngle)
+ {
+ fReaderMgr.getNextChar();
+ break;
+ }
+ else if (nextCh == chOpenAngle)
+ {
+ // Check for this one specially, since its going to be common
+ // and it is kind of auto-recovering since we've already hit the
+ // next open bracket, which is what we would have seeked to (and
+ // skipped this whole tag.)
+ emitError(XMLErrs::UnterminatedStartTag, elemName);
+ break;
+ }
+ else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
+ {
+ // Check for this one specially, which is probably a missing
+ // attribute name, e.g. ="value". Just issue expected name
+ // error and eat the quoted string, then jump back to the
+ // top again.
+ emitError(XMLErrs::ExpectedAttrName);
+ fReaderMgr.getNextChar();
+ fReaderMgr.skipQuotedString(nextCh);
+ fReaderMgr.skipPastSpaces();
+ continue;
+ }
+ }
+
+ return attCount;
+}
+
+
+// This method will kick off the scanning of the primary content of the
+// document, i.e. the elements.
+bool IGXMLScanner::scanContent()
+{
+ // Go into a loop until we hit the end of the root element, or we fall
+ // out because there is no root element.
+ //
+ // We have to do kind of a deeply nested double loop here in order to
+ // avoid doing the setup/teardown of the exception handler on each
+ // round. Doing it this way we only do it when an exception actually
+ // occurs.
+ bool gotData = true;
+ bool inMarkup = false;
+ while (gotData)
+ {
+ try
+ {
+ while (gotData)
+ {
+ // Sense what the next top level token is. According to what
+ // this tells us, we will call something to handle that kind
+ // of thing.
+ XMLSize_t orgReader;
+ const XMLTokens curToken = senseNextToken(orgReader);
+
+ // Handle character data and end of file specially. Char data
+ // is not markup so we don't want to handle it in the loop
+ // below.
+ if (curToken == Token_CharData)
+ {
+ // Scan the character data and call appropriate events. Let
+ // him use our local character data buffer for efficiency.
+ scanCharData(fCDataBuf);
+ continue;
+ }
+ else if (curToken == Token_EOF)
+ {
+ // The element stack better be empty at this point or we
+ // ended prematurely before all elements were closed.
+ if (!fElemStack.isEmpty())
+ {
+ const ElemStack::StackElem* topElem = fElemStack.popTop();
+ emitError
+ (
+ XMLErrs::EndedWithTagsOnStack
+ , topElem->fThisElement->getFullName()
+ );
+ }
+
+ // Its the end of file, so clear the got data flag
+ gotData = false;
+ continue;
+ }
+
+ // We are in some sort of markup now
+ inMarkup = true;
+
+ // According to the token we got, call the appropriate
+ // scanning method.
+ switch(curToken)
+ {
+ case Token_CData :
+ // Make sure we are within content
+ if (fElemStack.isEmpty())
+ emitError(XMLErrs::CDATAOutsideOfContent);
+ scanCDSection();
+ break;
+
+ case Token_Comment :
+ scanComment();
+ break;
+
+ case Token_EndTag :
+ scanEndTag(gotData);
+ break;
+
+ case Token_PI :
+ scanPI();
+ break;
+
+ case Token_StartTag :
+ if (fDoNamespaces)
+ scanStartTagNS(gotData);
+ else
+ scanStartTag(gotData);
+ break;
+
+ default :
+ fReaderMgr.skipToChar(chOpenAngle);
+ break;
+ }
+
+ if (orgReader != fReaderMgr.getCurrentReaderNum())
+ emitError(XMLErrs::PartialMarkupInEntity);
+
+ // And we are back out of markup again
+ inMarkup = false;
+ }
+ }
+ catch(const EndOfEntityException& toCatch)
+ {
+ // If we were in some markup when this happened, then its a
+ // partial markup error.
+ if (inMarkup)
+ emitError(XMLErrs::PartialMarkupInEntity);
+
+ // Send an end of entity reference event
+ if (fDocHandler)
+ fDocHandler->endEntityReference(toCatch.getEntity());
+
+ inMarkup = false;
+ }
+ }
+
+ // It went ok, so return success
+ return true;
+}
+
+
+void IGXMLScanner::scanEndTag(bool& gotData)
+{
+ // Assume we will still have data until proven otherwise. It will only
+ // ever be false if this is the end of the root element.
+ gotData = true;
+
+ // Check if the element stack is empty. If so, then this is an unbalanced
+ // element (i.e. more ends than starts, perhaps because of bad text
+ // causing one to be skipped.)
+ if (fElemStack.isEmpty())
+ {
+ emitError(XMLErrs::MoreEndThanStartTags);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
+ }
+
+ // Pop the stack of the element we are supposed to be ending. Remember
+ // that we don't own this. The stack just keeps them and reuses them.
+ unsigned int uriId = (fDoNamespaces)
+ ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
+
+ // these get initialized below
+ const ElemStack::StackElem* topElem = 0;
+ const XMLCh *elemName = 0;
+
+ // Make sure that its the end of the element that we expect
+ // special case for schema validation, whose element decls,
+ // obviously don't contain prefix information
+ if(fGrammarType == Grammar::SchemaGrammarType)
+ {
+ elemName = fElemStack.getCurrentSchemaElemName();
+ topElem = fElemStack.topElement();
+ }
+ else
+ {
+ topElem = fElemStack.topElement();
+ elemName = topElem->fThisElement->getFullName();
+ }
+ if (!fReaderMgr.skippedStringLong(elemName))
+ {
+ emitError
+ (
+ XMLErrs::ExpectedEndOfTagX
+ , elemName
+ );
+ fReaderMgr.skipPastChar(chCloseAngle);
+ fElemStack.popTop();
+ return;
+ }
+
+ // Make sure we are back on the same reader as where we started
+ if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
+ emitError(XMLErrs::PartialTagMarkupError);
+
+ // Skip optional whitespace
+ fReaderMgr.skipPastSpaces();
+
+ // Make sure we find the closing bracket
+ if (!fReaderMgr.skippedChar(chCloseAngle))
+ {
+ emitError
+ (
+ XMLErrs::UnterminatedEndTag
+ , topElem->fThisElement->getFullName()
+ );
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType)
+ {
+ // reset error occurred
+ fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
+ if (fValidate && topElem->fThisElement->isDeclared())
+ {
+ fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
+ if(!fPSVIElemContext.fCurrentTypeInfo)
+ fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
+ else
+ fPSVIElemContext.fCurrentDV = 0;
+ if(fPSVIHandler)
+ {
+ fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
+
+ if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
+ fPSVIElemContext.fNormalizedValue = 0;
+ }
+ }
+ else
+ {
+ fPSVIElemContext.fCurrentDV = 0;
+ fPSVIElemContext.fCurrentTypeInfo = 0;
+ fPSVIElemContext.fNormalizedValue = 0;
+ }
+ }
+
+ // If validation is enabled, then lets pass him the list of children and
+ // this element and let him validate it.
+ DatatypeValidator* psviMemberType = 0;
+ if (fValidate)
+ {
+
+ //
+ // XML1.0-3rd
+ // Validity Constraint:
+ // The declaration matches EMPTY and the element has no content (not even
+ // entity references, comments, PIs or white space).
+ //
+ if ( (fGrammarType == Grammar::DTDGrammarType) &&
+ (topElem->fCommentOrPISeen) &&
+ (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))
+ {
+ fValidator->emitError
+ (
+ XMLValid::EmptyElemHasContent
+ , topElem->fThisElement->getFullName()
+ );
+ }
+
+ //
+ // XML1.0-3rd
+ // Validity Constraint:
+ //
+ // The declaration matches children and the sequence of child elements
+ // belongs to the language generated by the regular expression in the
+ // content model, with optional white space, comments and PIs
+ // (i.e. markup matching production [27] Misc) between the start-tag and
+ // the first child element, between child elements, or between the last
+ // child element and the end-tag.
+ //
+ // Note that
+ // a CDATA section containing only white space or
+ // a reference to an entity whose replacement text is character references
+ // expanding to white space do not match the nonterminal S, and hence
+ // cannot appear in these positions; however,
+ // a reference to an internal entity with a literal value consisting
+ // of character references expanding to white space does match S,
+ // since its replacement text is the white space resulting from expansion
+ // of the character references.
+ //
+ if ( (fGrammarType == Grammar::DTDGrammarType) &&
+ (topElem->fReferenceEscaped) &&
+ (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElemChildrenHasInvalidWS
+ , topElem->fThisElement->getFullName()
+ );
+ }
+ XMLSize_t failure;
+ bool res = fValidator->checkContent
+ (
+ topElem->fThisElement
+ , topElem->fChildren
+ , topElem->fChildCount
+ , &failure
+ );
+
+ if (!res)
+ {
+ // One of the elements is not valid for the content. NOTE that
+ // if no children were provided but the content model requires
+ // them, it comes back with a zero value. But we cannot use that
+ // to index the child array in this case, and have to put out a
+ // special message.
+ if (!topElem->fChildCount)
+ {
+ fValidator->emitError
+ (
+ XMLValid::EmptyNotValidForContent
+ , topElem->fThisElement->getFormattedContentModel()
+ );
+ }
+ else if (failure >= topElem->fChildCount)
+ {
+ fValidator->emitError
+ (
+ XMLValid::NotEnoughElemsForCM
+ , topElem->fThisElement->getFormattedContentModel()
+ );
+ }
+ else
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElementNotValidForContent
+ , topElem->fChildren[failure]->getRawName()
+ , topElem->fThisElement->getFormattedContentModel()
+ );
+ }
+ }
+
+
+ if (fGrammarType == Grammar::SchemaGrammarType) {
+ if (((SchemaValidator*) fValidator)->getErrorOccurred())
+ fPSVIElemContext.fErrorOccurred = true;
+ else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
+ psviMemberType = fValidationContext->getValidatingMemberType();
+
+ if (fPSVIHandler)
+ {
+ fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
+ if(fPSVIElemContext.fIsSpecified)
+ fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
+ }
+
+ // call matchers and de-activate context
+ if (toCheckIdentityConstraint())
+ {
+ fICHandler->deactivateContext
+ (
+ (SchemaElementDecl *) topElem->fThisElement
+ , fContent.getRawBuffer()
+ , fValidationContext
+ , fPSVIElemContext.fCurrentDV
+ );
+ }
+
+ }
+ }
+
+ // QName dv needed topElem to resolve URIs on the checkContent
+ fElemStack.popTop();
+
+ // See if it was the root element, to avoid multiple calls below
+ const bool isRoot = fElemStack.isEmpty();
+
+ if (fGrammarType == Grammar::SchemaGrammarType)
+ {
+ if (fPSVIHandler)
+ {
+ endElementPSVI(
+ (SchemaElementDecl*)topElem->fThisElement, psviMemberType);
+ }
+ // now we can reset the datatype buffer, since the
+ // application has had a chance to copy the characters somewhere else
+ ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
+ }
+
+ // If we have a doc handler, tell it about the end tag
+ if (fDocHandler)
+ {
+ if (fGrammarType == Grammar::SchemaGrammarType) {
+ if (topElem->fPrefixColonPos != -1)
+ fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
+ else
+ fPrefixBuf.reset();
+ }
+ else {
+ fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
+ }
+ fDocHandler->endElement
+ (
+ *topElem->fThisElement
+ , uriId
+ , isRoot
+ , fPrefixBuf.getRawBuffer()
+ );
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType) {
+ if (!isRoot)
+ {
+ // update error information
+ fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
+
+
+ }
+ }
+
+ // If this was the root, then done with content
+ gotData = !isRoot;
+
+ if (gotData) {
+ if (fDoNamespaces) {
+ // Restore the grammar
+ fGrammar = fElemStack.getCurrentGrammar();
+ fGrammarType = fGrammar->getGrammarType();
+ if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
+ if (fValidatorFromUser)
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
+ else {
+ fValidator = fSchemaValidator;
+ }
+ }
+ else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
+ if (fValidatorFromUser)
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
+ else {
+ fValidator = fDTDValidator;
+ }
+ }
+
+ fValidator->setGrammar(fGrammar);
+ }
+
+ // Restore the validation flag
+ fValidate = fElemStack.getValidationFlag();
+ }
+}
+
+
+// This method handles the high level logic of scanning the DOCType
+// declaration. This calls the DTDScanner and kicks off both the scanning of
+// the internal subset and the scanning of the external subset, if any.
+//
+// When we get here the '<!DOCTYPE' part has already been scanned, which is
+// what told us that we had a doc type decl to parse.
+void IGXMLScanner::scanDocTypeDecl()
+{
+ // We have a doc type. So, switch the Grammar.
+ switchGrammar(XMLUni::fgDTDEntityString);
+
+ if (fDocTypeHandler)
+ fDocTypeHandler->resetDocType();
+
+ // There must be some space after DOCTYPE
+ bool skippedSomething;
+ fReaderMgr.skipPastSpaces(skippedSomething);
+ if (!skippedSomething)
+ {
+ emitError(XMLErrs::ExpectedWhitespace);
+
+ // Just skip the Doctype declaration and return
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ // Get a buffer for the root element
+ XMLBufBid bbRootName(&fBufMgr);
+
+ // Get a name from the input, which should be the name of the root
+ // element of the upcoming content.
+ int colonPosition;
+ bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) :
+ fReaderMgr.getName(bbRootName.getBuffer());
+ if (!validName)
+ {
+ if (bbRootName.isEmpty())
+ emitError(XMLErrs::NoRootElemInDOCTYPE);
+ else
+ emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer());
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ // Store the root element name for later check
+ setRootElemName(bbRootName.getRawBuffer());
+
+ // This element obviously is not going to exist in the element decl
+ // pool yet, but we need to call docTypeDecl. So force it into
+ // the element decl pool, marked as being there because it was in
+ // the DOCTYPE. Later, when its declared, the status will be updated.
+ //
+ // Only do this if we are not reusing the validator! If we are reusing,
+ // then look it up instead. It has to exist!
+ MemoryManager* const rootDeclMgr =
+ fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager;
+
+ DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl
+ (
+ bbRootName.getRawBuffer()
+ , fEmptyNamespaceId
+ , DTDElementDecl::Any
+ , rootDeclMgr
+ );
+
+ Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);
+ rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
+ rootDecl->setExternalElemDeclaration(true);
+ if(!fUseCachedGrammar)
+ {
+ fGrammar->putElemDecl(rootDecl);
+ rootDeclJanitor.release();
+ } else
+ {
+ // attach this to the undeclared element pool so that it gets deleted
+ XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
+ if (elemDecl)
+ {
+ rootDecl->setId(elemDecl->getId());
+ }
+ else
+ {
+ rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
+ rootDeclJanitor.release();
+ }
+ }
+
+ // Skip any spaces after the name
+ fReaderMgr.skipPastSpaces();
+
+ // And now if we are looking at a >, then we are done. It is not
+ // required to have an internal or external subset, though why you
+ // would not escapes me.
+ if (fReaderMgr.skippedChar(chCloseAngle)) {
+
+ // If we have a doc type handler and advanced callbacks are enabled,
+ // call the doctype event.
+ if (fDocTypeHandler)
+ fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
+ return;
+ }
+
+ // either internal/external subset
+ if (fValScheme == Val_Auto && !fValidate)
+ fValidate = true;
+
+ bool hasIntSubset = false;
+ bool hasExtSubset = false;
+ XMLCh* sysId = 0;
+ XMLCh* pubId = 0;
+
+ DTDScanner dtdScanner
+ (
+ (DTDGrammar*) fGrammar
+ , fDocTypeHandler
+ , fGrammarPoolMemoryManager
+ , fMemoryManager
+ );
+ dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
+
+ // If the next character is '[' then we have no external subset cause
+ // there is no system id, just the opening character of the internal
+ // subset. Else, has to be an id.
+ //
+ // Just look at the next char, don't eat it.
+ if (fReaderMgr.peekNextChar() == chOpenSquare)
+ {
+ hasIntSubset = true;
+ }
+ else
+ {
+ // Indicate we have an external subset
+ hasExtSubset = true;
+ fHasNoDTD = false;
+
+ // Get buffers for the ids
+ XMLBufBid bbPubId(&fBufMgr);
+ XMLBufBid bbSysId(&fBufMgr);
+
+ // Get the external subset id
+ if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
+ {
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ // Get copies of the ids we got
+ pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
+ sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
+
+ // Skip spaces and check again for the opening of an internal subset
+ fReaderMgr.skipPastSpaces();
+
+ // Just look at the next char, don't eat it.
+ if (fReaderMgr.peekNextChar() == chOpenSquare) {
+ hasIntSubset = true;
+ }
+ }
+
+ // Insure that the ids get cleaned up, if they got allocated
+ ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
+ ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
+
+ // If we have a doc type handler and advanced callbacks are enabled,
+ // call the doctype event.
+ if (fDocTypeHandler)
+ fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
+
+ // Ok, if we had an internal subset, we are just past the [ character
+ // and need to parse that first.
+ if (hasIntSubset)
+ {
+ // Eat the opening square bracket
+ fReaderMgr.getNextChar();
+
+ checkInternalDTD(hasExtSubset, sysId, pubId);
+
+ // And try to scan the internal subset. If we fail, try to recover
+ // by skipping forward tot he close angle and returning.
+ if (!dtdScanner.scanInternalSubset())
+ {
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ // Do a sanity check that some expanded PE did not propogate out of
+ // the doctype. This could happen if it was terminated early by bad
+ // syntax.
+ if (fReaderMgr.getReaderDepth() > 1)
+ {
+ emitError(XMLErrs::PEPropogated);
+
+ // Ask the reader manager to pop back down to the main level
+ fReaderMgr.cleanStackBackTo(1);
+ }
+
+ fReaderMgr.skipPastSpaces();
+ }
+
+ // And that should leave us at the closing > of the DOCTYPE line
+ if (!fReaderMgr.skippedChar(chCloseAngle))
+ {
+ // Do a special check for the common scenario of an extra ] char at
+ // the end. This is easy to recover from.
+ if (fReaderMgr.skippedChar(chCloseSquare)
+ && fReaderMgr.skippedChar(chCloseAngle))
+ {
+ emitError(XMLErrs::ExtraCloseSquare);
+ }
+ else
+ {
+ emitError(XMLErrs::UnterminatedDOCTYPE);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ }
+ }
+
+ // If we had an external subset, then we need to deal with that one
+ // next. If we are reusing the validator, then don't scan it.
+ if (hasExtSubset) {
+
+ InputSource* srcUsed=0;
+ Janitor<InputSource> janSrc(srcUsed);
+ // If we had an internal subset and we're using the cached grammar, it
+ // means that the ignoreCachedDTD is set, so we ignore the cached
+ // grammar
+ if (fUseCachedGrammar && !hasIntSubset)
+ {
+ srcUsed = resolveSystemId(sysId, pubId);
+ if (srcUsed) {
+ janSrc.reset(srcUsed);
+ Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId());
+
+ if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
+
+ fDTDGrammar = (DTDGrammar*) grammar;
+ fGrammar = fDTDGrammar;
+ fValidator->setGrammar(fGrammar);
+ // If we don't report at least the external subset boundaries,
+ // an advanced document handler cannot know when the DTD end,
+ // since we've already sent a doctype decl that indicates there's
+ // there's an external subset.
+ if (fDocTypeHandler)
+ {
+ fDocTypeHandler->startExtSubset();
+ fDocTypeHandler->endExtSubset();
+ }
+
+ return;
+ }
+ }
+ }
+
+ if (fLoadExternalDTD || fValidate)
+ {
+ // And now create a reader to read this entity
+ XMLReader* reader;
+ if (srcUsed) {
+ reader = fReaderMgr.createReader
+ (
+ *srcUsed
+ , false
+ , XMLReader::RefFrom_NonLiteral
+ , XMLReader::Type_General
+ , XMLReader::Source_External
+ , fCalculateSrcOfs
+ , fLowWaterMark
+ );
+ }
+ else {
+ reader = fReaderMgr.createReader
+ (
+ sysId
+ , pubId
+ , false
+ , XMLReader::RefFrom_NonLiteral
+ , XMLReader::Type_General
+ , XMLReader::Source_External
+ , srcUsed
+ , fCalculateSrcOfs
+ , fLowWaterMark
+ , fDisableDefaultEntityResolution
+ );
+ janSrc.reset(srcUsed);
+ }
+ // If it failed then throw an exception
+ if (!reader)
+ ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager);
+
+ if (fToCacheGrammar) {
+
+ unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
+ const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
+
+ fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
+ ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
+ fGrammarResolver->putGrammar(fGrammar);
+ }
+
+ // In order to make the processing work consistently, we have to
+ // make this look like an external entity. So create an entity
+ // decl and fill it in and push it with the reader, as happens
+ // with an external entity. Put a janitor on it to insure it gets
+ // cleaned up. The reader manager does not adopt them.
+ const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
+ DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
+ declDTD->setSystemId(sysId);
+ declDTD->setIsExternal(true);
+
+ // Mark this one as a throw at end
+ reader->setThrowAtEnd(true);
+
+ // And push it onto the stack, with its pseudo name
+ fReaderMgr.pushReader(reader, declDTD);
+
+ // Tell it its not in an include section
+ dtdScanner.scanExtSubsetDecl(false, true);
+ }
+ }
+}
+
+bool IGXMLScanner::scanStartTag(bool& gotData)
+{
+ // Assume we will still have data until proven otherwise. It will only
+ // ever be false if this is the root and its empty.
+ gotData = true;
+
+ // Get the QName. In this case, we are not doing namespaces, so we just
+ // use it as is and don't have to break it into parts.
+ if (!fReaderMgr.getName(fQNameBuf))
+ {
+ emitError(XMLErrs::ExpectedElementName);
+ fReaderMgr.skipToChar(chOpenAngle);
+ return false;
+ }
+
+ // Assume it won't be an empty tag
+ bool isEmpty = false;
+
+ // Lets try to look up the element in the validator's element decl pool
+ // We can pass bogus values for the URI id and the base name. We know that
+ // this can only be called if we are doing a DTD style validator and that
+ // he will only look at the QName.
+ //
+ // We tell him to fault in a decl if he does not find one.
+ // Actually, we *don't* tell him to fault in a decl if he does not find one- NG
+ bool wasAdded = false;
+ const XMLCh *rawQName = fQNameBuf.getRawBuffer();
+ XMLElementDecl* elemDecl = fGrammar->getElemDecl
+ (
+ fEmptyNamespaceId
+ , 0
+ , rawQName
+ , Grammar::TOP_LEVEL_SCOPE
+ );
+ // look for it in the undeclared pool:
+ if(!elemDecl)
+ {
+ elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
+ }
+ if(!elemDecl)
+ {
+ // we're assuming this must be a DTD element. DTD's can be
+ // used with or without namespaces, but schemas cannot be used without
+ // namespaces.
+ wasAdded = true;
+ elemDecl = new (fMemoryManager) DTDElementDecl
+ (
+ rawQName
+ , fEmptyNamespaceId
+ , DTDElementDecl::Any
+ , fMemoryManager
+ );
+ elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
+ }
+
+ // We do something different here according to whether we found the
+ // element or not.
+ if (wasAdded)
+ {
+ // If validating then emit an error
+ if (fValidate)
+ {
+ // This is to tell the reuse Validator that this element was
+ // faulted-in, was not an element in the validator pool originally
+ elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
+
+ fValidator->emitError
+ (
+ XMLValid::ElementNotDefined
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ else
+ {
+ // If its not marked declared and validating, then emit an error
+ if (fValidate && !elemDecl->isDeclared())
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElementNotDefined
+ , elemDecl->getFullName()
+ );
+ }
+ }
+
+ // See if its the root element
+ const bool isRoot = fElemStack.isEmpty();
+
+ // Expand the element stack and add the new element
+ fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
+ fElemStack.setValidationFlag(fValidate);
+
+ // Validate the element
+ if (fValidate)
+ fValidator->validateElement(elemDecl);
+
+ // If this is the first element and we are validating, check the root
+ // element.
+ if (isRoot)
+ {
+ fRootGrammar = fGrammar;
+
+ if (fValidate)
+ {
+ // If a DocType exists, then check if it matches the root name there.
+ if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName))
+ fValidator->emitError(XMLValid::RootElemNotLikeDocType);
+ }
+ }
+ else
+ {
+ // If the element stack is not empty, then add this element as a
+ // child of the previous top element. If its empty, this is the root
+ // elem and is not the child of anything.
+ fElemStack.addChild(elemDecl->getElementName(), true);
+ }
+
+ // Skip any whitespace after the name
+ fReaderMgr.skipPastSpaces();
+
+ // We loop until we either see a /> or >, handling attribute/value
+ // pairs until we get there.
+ XMLSize_t attCount = 0;
+ XMLSize_t curAttListSize = fAttrList->size();
+ wasAdded = false;
+
+ fElemCount++;
+
+ while (true)
+ {
+ // And get the next non-space character
+ XMLCh nextCh = fReaderMgr.peekNextChar();
+
+ // If the next character is not a slash or closed angle bracket,
+ // then it must be whitespace, since whitespace is required
+ // between the end of the last attribute and the name of the next
+ // one.
+ if (attCount)
+ {
+ if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
+ {
+ bool bFoundSpace;
+ fReaderMgr.skipPastSpaces(bFoundSpace);
+ if (!bFoundSpace)
+ {
+ // Emit the error but keep on going
+ emitError(XMLErrs::ExpectedWhitespace);
+ }
+ // Ok, peek another char
+ nextCh = fReaderMgr.peekNextChar();
+ }
+ }
+
+ // Ok, here we first check for any of the special case characters.
+ // If its not one, then we do the normal case processing, which
+ // assumes that we've hit an attribute value, Otherwise, we do all
+ // the special case checks.
+ if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
+ {
+ // Assume its going to be an attribute, so get a name from
+ // the input.
+ if (!fReaderMgr.getName(fAttNameBuf))
+ {
+ emitError(XMLErrs::ExpectedAttrName);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return false;
+ }
+
+ // And next must be an equal sign
+ if (!scanEq())
+ {
+ static const XMLCh tmpList[] =
+ {
+ chSingleQuote, chDoubleQuote, chCloseAngle
+ , chOpenAngle, chForwardSlash, chNull
+ };
+
+ emitError(XMLErrs::ExpectedEqSign);
+
+ // Try to sync back up by skipping forward until we either
+ // hit something meaningful.
+ const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
+
+ if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
+ {
+ // Jump back to top for normal processing of these
+ continue;
+ }
+ else if ((chFound == chSingleQuote)
+ || (chFound == chDoubleQuote)
+ || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
+ {
+ // Just fall through assuming that the value is to follow
+ }
+ else if (chFound == chOpenAngle)
+ {
+ // Assume a malformed tag and that new one is starting
+ emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
+ return false;
+ }
+ else
+ {
+ // Something went really wrong
+ return false;
+ }
+ }
+ // See if this attribute is declared for this element. If we are
+ // not validating of course it will not be at first, but we will
+ // fault it into the pool (to avoid lots of redundant errors.)
+ XMLCh * namePtr = fAttNameBuf.getRawBuffer();
+ XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
+
+ // Add this attribute to the attribute list that we use to
+ // pass them to the handler. We reuse its existing elements
+ // but expand it as required.
+ // Note that we want to this first since this will
+ // make a copy of the namePtr; we can then make use of
+ // that copy in the hashtable lookup that checks
+ // for duplicates. This will mean we may have to update
+ // the type of the XMLAttr later.
+ XMLAttr* curAtt;
+ if (attCount >= curAttListSize)
+ {
+ curAtt = new (fMemoryManager) XMLAttr
+ (
+ 0
+ , namePtr
+ , XMLUni::fgZeroLenString
+ , XMLUni::fgZeroLenString
+ , (attDef)?attDef->getType():XMLAttDef::CData
+ , true
+ , fMemoryManager
+ );
+ fAttrList->addElement(curAtt);
+ }
+ else
+ {
+ curAtt = fAttrList->elementAt(attCount);
+ curAtt->set
+ (
+ 0
+ , namePtr
+ , XMLUni::fgZeroLenString
+ , XMLUni::fgZeroLenString
+ , (attDef)?attDef->getType():XMLAttDef::CData
+ );
+ curAtt->setSpecified(true);
+ }
+ // reset namePtr so it refers to newly-allocated memory
+ namePtr = (XMLCh *)curAtt->getName();
+
+ if (!attDef)
+ {
+ // If there is a validation handler, then we are validating
+ // so emit an error.
+ if (fValidate)
+ {
+ fValidator->emitError
+ (
+ XMLValid::AttNotDefinedForElement
+ , fAttNameBuf.getRawBuffer()
+ , elemDecl->getFullName()
+ );
+ }
+ if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , namePtr
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ else
+ {
+ // prepare for duplicate detection
+ unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fAttDefRegistry->put(attDef, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , attDef->getFullName()
+ , elemDecl->getFullName()
+ );
+ }
+ }
+
+ // Skip any whitespace before the value and then scan the att
+ // value. This will come back normalized with entity refs and
+ // char refs expanded.
+ fReaderMgr.skipPastSpaces();
+ if (!scanAttValue(attDef, namePtr, fAttValueBuf))
+ {
+ static const XMLCh tmpList[] =
+ {
+ chCloseAngle, chOpenAngle, chForwardSlash, chNull
+ };
+
+ emitError(XMLErrs::ExpectedAttrValue);
+
+ // It failed, so lets try to get synced back up. We skip
+ // forward until we find some whitespace or one of the
+ // chars in our list.
+ const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
+
+ if ((chFound == chCloseAngle)
+ || (chFound == chForwardSlash)
+ || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
+ {
+ // Just fall through and process this attribute, though
+ // the value will be "".
+ }
+ else if (chFound == chOpenAngle)
+ {
+ // Assume a malformed tag and that new one is starting
+ emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
+ return false;
+ }
+ else
+ {
+ // Something went really wrong
+ return false;
+ }
+ }
+ // must set the newly-minted value on the XMLAttr:
+ curAtt->setValue(fAttValueBuf.getRawBuffer());
+
+ // Now that its all stretched out, lets look at its type and
+ // determine if it has a valid value. It will output any needed
+ // errors, but we just keep going. We only need to do this if
+ // we are validating.
+ if (attDef)
+ {
+ // Let the validator pass judgement on the attribute value
+ if (fValidate)
+ {
+ fValidator->validateAttrValue
+ (
+ attDef
+ , fAttValueBuf.getRawBuffer()
+ , false
+ , elemDecl
+ );
+ }
+ }
+
+ attCount++;
+ // And jump back to the top of the loop
+ continue;
+ }
+
+ // It was some special case character so do all of the checks and
+ // deal with it.
+ if (!nextCh)
+ ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
+
+ if (nextCh == chForwardSlash)
+ {
+ fReaderMgr.getNextChar();
+ isEmpty = true;
+ if (!fReaderMgr.skippedChar(chCloseAngle))
+ emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
+ break;
+ }
+ else if (nextCh == chCloseAngle)
+ {
+ fReaderMgr.getNextChar();
+ break;
+ }
+ else if (nextCh == chOpenAngle)
+ {
+ // Check for this one specially, since its going to be common
+ // and it is kind of auto-recovering since we've already hit the
+ // next open bracket, which is what we would have seeked to (and
+ // skipped this whole tag.)
+ emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
+ break;
+ }
+ else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
+ {
+ // Check for this one specially, which is probably a missing
+ // attribute name, e.g. ="value". Just issue expected name
+ // error and eat the quoted string, then jump back to the
+ // top again.
+ emitError(XMLErrs::ExpectedAttrName);
+ fReaderMgr.getNextChar();
+ fReaderMgr.skipQuotedString(nextCh);
+ fReaderMgr.skipPastSpaces();
+ continue;
+ }
+ }
+
+ if(attCount)
+ {
+ // clean up after ourselves:
+ // clear the map used to detect duplicate attributes
+ fUndeclaredAttrRegistry->removeAll();
+ }
+
+ // Ok, so lets get an enumerator for the attributes of this element
+ // and run through them for well formedness and validity checks. But
+ // make sure that we had any attributes before we do it, since the list
+ // would have have gotten faulted in anyway.
+ if (elemDecl->hasAttDefs())
+ {
+ // N.B.: this assumes DTD validation.
+ XMLAttDefList& attDefList = elemDecl->getAttDefList();
+ for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
+ {
+ // Get the current att def, for convenience and its def type
+ const XMLAttDef& curDef = attDefList.getAttDef(i);
+ const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
+
+ unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
+ if (!attCountPtr || *attCountPtr < fElemCount)
+ { // did not occur
+ if (fValidate)
+ {
+ // If we are validating and its required, then an error
+ if (defType == XMLAttDef::Required)
+ {
+ fValidator->emitError
+ (
+ XMLValid::RequiredAttrNotProvided
+ , curDef.getFullName()
+ );
+ }
+ else if ((defType == XMLAttDef::Default) ||
+ (defType == XMLAttDef::Fixed) )
+ {
+ if (fStandalone && curDef.isExternal())
+ {
+ // XML 1.0 Section 2.9
+ // Document is standalone, so attributes must not be defaulted.
+ fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
+
+ }
+ }
+ }
+
+ // Fault in the value if needed, and bump the att count
+ if ((defType == XMLAttDef::Default)
+ || (defType == XMLAttDef::Fixed))
+ {
+ // Let the validator pass judgement on the attribute value
+ if (fValidate)
+ {
+ fValidator->validateAttrValue
+ (
+ &curDef
+ , curDef.getValue()
+ , false
+ , elemDecl
+ );
+ }
+
+ XMLAttr* curAtt;
+ if (attCount >= curAttListSize)
+ {
+ curAtt = new (fMemoryManager) XMLAttr
+ (
+ 0
+ , curDef.getFullName()
+ , XMLUni::fgZeroLenString
+ , curDef.getValue()
+ , curDef.getType()
+ , false
+ , fMemoryManager
+ );
+ fAttrList->addElement(curAtt);
+ curAttListSize++;
+ }
+ else
+ {
+ curAtt = fAttrList->elementAt(attCount);
+ curAtt->set
+ (
+ 0
+ , curDef.getFullName()
+ , XMLUni::fgZeroLenString
+ , curDef.getValue()
+ , curDef.getType()
+ );
+ curAtt->setSpecified(false);
+ }
+ attCount++;
+ }
+ }
+ }
+ }
+
+ // If empty, validate content right now if we are validating and then
+ // pop the element stack top. Else, we have to update the current stack
+ // top's namespace mapping elements.
+ if (isEmpty)
+ {
+ // If validating, then insure that its legal to have no content
+ if (fValidate)
+ {
+ XMLSize_t failure;
+ bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
+ if (!res)
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElementNotValidForContent
+ , elemDecl->getFullName()
+ , elemDecl->getFormattedContentModel()
+ );
+ }
+ }
+
+ // Pop the element stack back off since it'll never be used now
+ fElemStack.popTop();
+
+ // If the elem stack is empty, then it was an empty root
+ if (isRoot)
+ gotData = false;
+ else {
+ // Restore the validation flag
+ fValidate = fElemStack.getValidationFlag();
+ }
+ }
+
+ // If we have a document handler, then tell it about this start tag. We
+ // don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
+ // any prefix since its just one big name if we are not doing namespaces.
+ if (fDocHandler)
+ {
+ fDocHandler->startElement
+ (
+ *elemDecl
+ , fEmptyNamespaceId
+ , 0
+ , *fAttrList
+ , attCount
+ , isEmpty
+ , isRoot
+ );
+ }
+
+ return true;
+}
+
+
+// This method is called to scan a start tag when we are processing
+// namespaces. There are two different versions of this method, one for
+// namespace aware processing and one for non-namespace aware processing.
+//
+// This method is called after we've scanned the < of a start tag. So we
+// have to get the element name, then scan the attributes, after which
+// we are either going to see >, />, or attributes followed by one of those
+// sequences.
+bool IGXMLScanner::scanStartTagNS(bool& gotData)
+{
+ // Assume we will still have data until proven otherwise. It will only
+ // ever be false if this is the root and its empty.
+ gotData = true;
+
+ // Reset element content buffer
+ fContent.reset();
+
+ // The current position is after the open bracket, so we need to read in
+ // in the element name.
+ int prefixColonPos;
+ if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
+ {
+ if (fQNameBuf.isEmpty())
+ emitError(XMLErrs::ExpectedElementName);
+ else
+ emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
+ fReaderMgr.skipToChar(chOpenAngle);
+ return false;
+ }
+
+ // See if its the root element
+ const bool isRoot = fElemStack.isEmpty();
+
+ // Skip any whitespace after the name
+ fReaderMgr.skipPastSpaces();
+
+ // First we have to do the rawest attribute scan. We don't do any
+ // normalization of them at all, since we don't know yet what type they
+ // might be (since we need the element decl in order to do that.)
+ bool isEmpty;
+ XMLSize_t attCount = rawAttrScan
+ (
+ fQNameBuf.getRawBuffer()
+ , *fRawAttrList
+ , isEmpty
+ );
+
+ // save the contentleafname and currentscope before addlevel, for later use
+ ContentLeafNameTypeVector* cv = 0;
+ XMLContentModel* cm = 0;
+ unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
+ bool laxThisOne = false;
+
+ if (!isRoot && fGrammarType == Grammar::SchemaGrammarType)
+ {
+ // schema validator will have correct type if validating
+ SchemaElementDecl* tempElement = (SchemaElementDecl*)
+ fElemStack.topElement()->fThisElement;
+ SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
+ ComplexTypeInfo *currType = 0;
+
+ if (fValidate)
+ {
+ currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
+ if (currType)
+ modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
+ else // something must have gone wrong
+ modelType = SchemaElementDecl::Any;
+ }
+ else
+ {
+ currType = tempElement->getComplexTypeInfo();
+ }
+
+ if ((modelType == SchemaElementDecl::Mixed_Simple)
+ || (modelType == SchemaElementDecl::Mixed_Complex)
+ || (modelType == SchemaElementDecl::Children))
+ {
+ cm = currType->getContentModel();
+ cv = cm->getContentLeafNameTypeVector();
+ currentScope = fElemStack.getCurrentScope();
+ }
+ else if (modelType == SchemaElementDecl::Any) {
+ laxThisOne = true;
+ }
+ }
+
+ // Now, since we might have to update the namespace map for this element,
+ // but we don't have the element decl yet, we just tell the element stack
+ // to expand up to get ready.
+ XMLSize_t elemDepth = fElemStack.addLevel();
+ fElemStack.setValidationFlag(fValidate);
+ fElemStack.setPrefixColonPos(prefixColonPos);
+
+ // Check if there is any external schema location specified, and if we are at root,
+ // go through them first before scanning those specified in the instance document
+ if (isRoot && fDoSchema
+ && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
+
+ if (fExternalSchemaLocation)
+ parseSchemaLocation(fExternalSchemaLocation, true);
+ if (fExternalNoNamespaceSchemaLocation)
+ resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
+ }
+
+ // Make an initial pass through the list and find any xmlns attributes or
+ // schema attributes.
+ if (attCount) {
+ scanRawAttrListforNameSpaces(attCount);
+ }
+
+ // Also find any default or fixed xmlns attributes in DTD defined for
+ // this element.
+ XMLElementDecl* elemDecl = 0;
+ const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
+
+ if (fGrammarType == Grammar::DTDGrammarType) {
+
+ if (!fSkipDTDValidation) {
+ elemDecl = fGrammar->getElemDecl(
+ fEmptyNamespaceId, 0, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
+ );
+
+ if (elemDecl) {
+ if (elemDecl->hasAttDefs()) {
+ XMLAttDefList& attDefList = elemDecl->getAttDefList();
+ for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
+ {
+ // Get the current att def, for convenience and its def type
+ const XMLAttDef& curDef = attDefList.getAttDef(i);
+ const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
+
+ // update the NSMap if there are any default/fixed xmlns attributes
+ if ((defType == XMLAttDef::Default)
+ || (defType == XMLAttDef::Fixed))
+ {
+ const XMLCh* rawPtr = curDef.getFullName();
+ if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
+ || XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
+ updateNSMap(rawPtr, curDef.getValue());
+ }
+ }
+ }
+ }
+ }
+
+ if (!elemDecl) {
+ elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
+ }
+ }
+
+ // Resolve the qualified name to a URI and name so that we can look up
+ // the element decl for this element. We have now update the prefix to
+ // namespace map so we should get the correct element now.
+ unsigned int uriId = resolveQNameWithColon(
+ qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos
+ );
+
+ //if schema, check if we should lax or skip the validation of this element
+ bool parentValidation = fValidate;
+ if (cv) {
+ QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
+ // elementDepth will be > 0, as cv is only constructed if element is not
+ // root.
+ laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
+ }
+
+ // Look up the element now in the grammar. This will get us back a
+ // generic element decl object. We tell him to fault one in if he does
+ // not find it.
+ bool wasAdded = false;
+ const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
+
+ if (fDoSchema) {
+
+ if (fGrammarType == Grammar::DTDGrammarType) {
+ if (!switchGrammar(getURIText(uriId))) {
+ fValidator->emitError(
+ XMLValid::GrammarNotFound, getURIText(uriId)
+ );
+ }
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType) {
+ elemDecl = fGrammar->getElemDecl(
+ uriId, nameRawBuf, qnameRawBuf, currentScope
+ );
+
+ // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
+ if (!elemDecl) {
+ bool checkTopLevel = (currentScope != Grammar::TOP_LEVEL_SCOPE);
+ const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
+ unsigned int orgGrammarUri = fURIStringPool->getId(original_uriStr);
+
+ if (orgGrammarUri != uriId) {
+ if (switchGrammar(getURIText(uriId))) {
+ checkTopLevel = true;
+ }
+ else {
+ // the laxElementValidation routine (called above) will
+ // set fValidate to false for a "skipped" element
+ if (!laxThisOne && fValidate) {
+ fValidator->emitError(
+ XMLValid::GrammarNotFound, getURIText(uriId)
+ );
+ }
+ checkTopLevel = false;
+ }
+ }
+
+ if (checkTopLevel) {
+ elemDecl = fGrammar->getElemDecl(
+ uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
+ );
+ }
+
+ if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
+
+ if (orgGrammarUri == uriId) {
+ // still not found in specified uri
+ // try emptyNamespace see if element should be
+ // un-qualified.
+ // Use a temp variable until we decide this is the case
+ if (uriId != fEmptyNamespaceId) {
+ XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
+ fEmptyNamespaceId, nameRawBuf, qnameRawBuf, currentScope
+ );
+
+ if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
+ fValidator->emitError(
+ XMLValid::ElementNotUnQualified, qnameRawBuf
+ );
+ elemDecl = tempElemDecl;
+ }
+ }
+ }
+ // still Not found in specified uri
+ // go to original Grammar again to see if element needs
+ // to be fully qualified.
+ // Use a temp variable until we decide this is the case
+ else if (uriId == fEmptyNamespaceId) {
+
+ if (switchGrammar(original_uriStr)) {
+ XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
+ orgGrammarUri, nameRawBuf, qnameRawBuf, currentScope
+ );
+ if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
+ fValidator->emitError(
+ XMLValid::ElementNotQualified, qnameRawBuf
+ );
+ elemDecl = tempElemDecl;
+ }
+ }
+ else if (!laxThisOne && fValidate) {
+ fValidator->emitError(
+ XMLValid::GrammarNotFound,original_uriStr
+ );
+ }
+ }
+ }
+
+ if (!elemDecl) {
+ // still not found
+ // switch back to original grammar first if necessary
+ if (orgGrammarUri != uriId) {
+ switchGrammar(original_uriStr);
+ }
+
+ // look in the list of undeclared elements, as would have been
+ // done before we made grammars stateless:
+ elemDecl = fSchemaElemNonDeclPool->getByKey(
+ nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE
+ );
+ }
+ }
+ }
+ }
+
+ if (!elemDecl) {
+
+ if (fGrammarType == Grammar::DTDGrammarType) {
+ elemDecl = new (fMemoryManager) DTDElementDecl(
+ qnameRawBuf, uriId, DTDElementDecl::Any, fMemoryManager
+ );
+ elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
+ }
+ else if (fGrammarType == Grammar::SchemaGrammarType) {
+ elemDecl = new (fMemoryManager) SchemaElementDecl(
+ fPrefixBuf.getRawBuffer(), nameRawBuf, uriId
+ , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE
+ , fMemoryManager
+ );
+ elemDecl->setId(
+ fSchemaElemNonDeclPool->put((void*)elemDecl->getBaseName()
+ , uriId, (int)Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl)
+ );
+ } else {
+ fValidator->emitError(
+ XMLValid::GrammarNotFound, getURIText(uriId)
+ );
+ }
+ wasAdded = true;
+ }
+
+ // this info needed for DOMTypeInfo
+ fPSVIElemContext.fErrorOccurred = false;
+
+ // We do something different here according to whether we found the
+ // element or not.
+ bool bXsiTypeSet= (fValidator && fGrammarType == Grammar::SchemaGrammarType)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
+ if (wasAdded)
+ {
+ if (laxThisOne && !bXsiTypeSet) {
+ fValidate = false;
+ fElemStack.setValidationFlag(fValidate);
+ }
+ else if (fValidate)
+ {
+ // If validating then emit an error
+
+ // This is to tell the reuse Validator that this element was
+ // faulted-in, was not an element in the grammar pool originally
+ elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
+
+ // xsi:type was specified, don't complain about missing definition
+ if(!bXsiTypeSet)
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElementNotDefined
+ , elemDecl->getFullName()
+ );
+
+ if(fGrammarType == Grammar::SchemaGrammarType)
+ {
+ fPSVIElemContext.fErrorOccurred = true;
+ }
+ }
+ }
+ }
+ else
+ {
+ // If its not marked declared and validating, then emit an error
+ if (!elemDecl->isDeclared()) {
+ if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
+ if(!bXsiTypeSet && fGrammarType == Grammar::SchemaGrammarType) {
+ fPSVIElemContext.fErrorOccurred = true;
+ }
+ }
+
+ if (laxThisOne) {
+ fValidate = false;
+ fElemStack.setValidationFlag(fValidate);
+ }
+ else if (fValidate && !bXsiTypeSet)
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElementNotDefined
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ }
+
+ // Now we can update the element stack to set the current element
+ // decl. We expanded the stack above, but couldn't store the element
+ // decl because we didn't know it yet.
+ fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
+ fElemStack.setCurrentURI(uriId);
+
+ if (isRoot)
+ {
+ fRootGrammar = fGrammar;
+ if (fGrammarType == Grammar::SchemaGrammarType && !fRootElemName)
+ fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType && fPSVIHandler)
+ {
+
+ fPSVIElemContext.fElemDepth++;
+ if (elemDecl->isDeclared())
+ {
+ fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
+ }
+ else
+ {
+ fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
+
+ /******
+ * While we report an error for historical reasons, this should
+ * actually result in lax assessment - NG.
+ if (isRoot && fValidate)
+ fPSVIElemContext.fErrorOccurred = true;
+ *****/
+ }
+ }
+
+ // Validate the element
+ if (fValidate)
+ {
+ fValidator->validateElement(elemDecl);
+ if (fValidator->handlesSchema())
+ {
+ if (((SchemaValidator*) fValidator)->getErrorOccurred())
+ fPSVIElemContext.fErrorOccurred = true;
+ }
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType) {
+
+ // squirrel away the element's QName, so that we can do an efficient
+ // end-tag match
+ fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
+
+ ComplexTypeInfo* typeinfo = (fValidate)
+ ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
+ : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
+
+ if (typeinfo) {
+ currentScope = typeinfo->getScopeDefined();
+
+ // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
+ XMLCh* typeName = typeinfo->getTypeName();
+ const int comma = XMLString::indexOf(typeName, chComma);
+ if (comma > 0) {
+ XMLBuffer prefixBuf(comma+1, fMemoryManager);
+ prefixBuf.append(typeName, comma);
+ const XMLCh* uriStr = prefixBuf.getRawBuffer();
+
+ bool errorCondition = !switchGrammar(uriStr) && fValidate;
+ if (errorCondition && !laxThisOne)
+ {
+ fValidator->emitError
+ (
+ XMLValid::GrammarNotFound
+ , prefixBuf.getRawBuffer()
+ );
+ }
+ }
+ else if (comma == 0) {
+ bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
+ if (errorCondition && !laxThisOne)
+ {
+ fValidator->emitError
+ (
+ XMLValid::GrammarNotFound
+ , XMLUni::fgZeroLenString
+ );
+ }
+ }
+ }
+ fElemStack.setCurrentScope(currentScope);
+
+ // Set element next state
+ if (elemDepth >= fElemStateSize) {
+ resizeElemState();
+ }
+
+ fElemState[elemDepth] = 0;
+ fElemLoopState[elemDepth] = 0;
+ }
+
+ fElemStack.setCurrentGrammar(fGrammar);
+
+ // If this is the first element and we are validating, check the root
+ // element.
+ if (isRoot)
+ {
+ if (fValidate)
+ {
+ // If a DocType exists, then check if it matches the root name there.
+ if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
+ fValidator->emitError(XMLValid::RootElemNotLikeDocType);
+ }
+ }
+ else if (parentValidation)
+ {
+ // If the element stack is not empty, then add this element as a
+ // child of the previous top element. If its empty, this is the root
+ // elem and is not the child of anything.
+ fElemStack.addChild(elemDecl->getElementName(), true);
+ }
+
+ // PSVI handling: even if it turns out there are
+ // no attributes, we need to reset this list...
+ if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType )
+ fPSVIAttrList->reset();
+
+ // Now lets get the fAttrList filled in. This involves faulting in any
+ // defaulted and fixed attributes and normalizing the values of any that
+ // we got explicitly.
+ //
+ // We update the attCount value with the total number of attributes, but
+ // it goes in with the number of values we got during the raw scan of
+ // explictly provided attrs above.
+ attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
+ if(attCount)
+ {
+ // clean up after ourselves:
+ // clear the map used to detect duplicate attributes
+ fUndeclaredAttrRegistry->removeAll();
+ }
+
+ // activate identity constraints
+ if (fGrammar &&
+ fGrammarType == Grammar::SchemaGrammarType &&
+ toCheckIdentityConstraint())
+ {
+ fICHandler->activateIdentityConstraint
+ (
+ (SchemaElementDecl*) elemDecl
+ , (int) elemDepth
+ , uriId
+ , fPrefixBuf.getRawBuffer()
+ , *fAttrList
+ , attCount
+ , fValidationContext
+ );
+ }
+
+ // Since the element may have default values, call start tag now regardless if it is empty or not
+ // If we have a document handler, then tell it about this start tag
+ if (fDocHandler)
+ {
+ fDocHandler->startElement
+ (
+ *elemDecl
+ , uriId
+ , fPrefixBuf.getRawBuffer()
+ , *fAttrList
+ , attCount
+ , false
+ , isRoot
+ );
+ }
+
+ // if we have a PSVIHandler, now's the time to call
+ // its handleAttributesPSVI method:
+ if(fPSVIHandler && fGrammarType == Grammar::SchemaGrammarType)
+ {
+ QName *eName = elemDecl->getElementName();
+ fPSVIHandler->handleAttributesPSVI
+ (
+ eName->getLocalPart()
+ , fURIStringPool->getValueForId(eName->getURI())
+ , fPSVIAttrList
+ );
+ }
+
+ // If empty, validate content right now if we are validating and then
+ // pop the element stack top. Else, we have to update the current stack
+ // top's namespace mapping elements.
+ if (isEmpty)
+ {
+ // Pop the element stack back off since it'll never be used now
+ fElemStack.popTop();
+
+ // reset current type info
+ DatatypeValidator* psviMemberType = 0;
+ if (fGrammarType == Grammar::SchemaGrammarType)
+ {
+ if (fValidate && elemDecl->isDeclared())
+ {
+ fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
+ if(!fPSVIElemContext.fCurrentTypeInfo)
+ fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
+ else
+ fPSVIElemContext.fCurrentDV = 0;
+ if(fPSVIHandler)
+ {
+ fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
+
+ if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
+ fPSVIElemContext.fNormalizedValue = 0;
+ }
+ }
+ else
+ {
+ fPSVIElemContext.fCurrentDV = 0;
+ fPSVIElemContext.fCurrentTypeInfo = 0;
+ fPSVIElemContext.fNormalizedValue = 0;
+ }
+ }
+
+ // If validating, then insure that its legal to have no content
+ if (fValidate)
+ {
+ XMLSize_t failure;
+ bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
+ if (!res)
+ {
+ fValidator->emitError
+ (
+ XMLValid::ElementNotValidForContent
+ , elemDecl->getFullName()
+ , elemDecl->getFormattedContentModel()
+ );
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType) {
+
+ if (((SchemaValidator*) fValidator)->getErrorOccurred())
+ {
+ fPSVIElemContext.fErrorOccurred = true;
+ }
+ else
+ {
+ if (fPSVIHandler)
+ {
+ fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
+ if(fPSVIElemContext.fIsSpecified)
+ fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
+ }
+ // note that if we're empty, won't be a current DV
+ if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
+ psviMemberType = fValidationContext->getValidatingMemberType();
+ }
+
+ // call matchers and de-activate context
+ if (toCheckIdentityConstraint())
+ {
+ fICHandler->deactivateContext
+ (
+ (SchemaElementDecl *) elemDecl
+ , fContent.getRawBuffer()
+ , fValidationContext
+ , fPSVIElemContext.fCurrentDV
+ );
+ }
+
+ }
+ }
+ else if (fGrammarType == Grammar::SchemaGrammarType) {
+ ((SchemaValidator*)fValidator)->resetNillable();
+ }
+
+ if (fGrammarType == Grammar::SchemaGrammarType)
+ {
+ if (fPSVIHandler)
+ {
+ endElementPSVI((SchemaElementDecl*)elemDecl, psviMemberType);
+ }
+ }
+
+ // If we have a doc handler, tell it about the end tag
+ if (fDocHandler)
+ {
+ fDocHandler->endElement
+ (
+ *elemDecl
+ , uriId
+ , isRoot
+ , fPrefixBuf.getRawBuffer()
+ );
+ }
+
+ // If the elem stack is empty, then it was an empty root
+ if (isRoot)
+ gotData = false;
+ else
+ {
+ // Restore the grammar
+ fGrammar = fElemStack.getCurrentGrammar();
+ fGrammarType = fGrammar->getGrammarType();
+ if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
+ if (fValidatorFromUser)
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
+ else {
+ fValidator = fSchemaValidator;
+ }
+ }
+ else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
+ if (fValidatorFromUser)
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
+ else {
+ fValidator = fDTDValidator;
+ }
+ }
+
+ fValidator->setGrammar(fGrammar);
+
+ // Restore the validation flag
+ fValidate = fElemStack.getValidationFlag();
+ }
+ }
+ else if (fGrammarType == Grammar::SchemaGrammarType)
+ {
+ // send a partial element psvi
+ if (fPSVIHandler)
+ {
+
+ ComplexTypeInfo* curTypeInfo = 0;
+ DatatypeValidator* curDV = 0;
+ XSTypeDefinition* typeDef = 0;
+
+ if (fValidate && elemDecl->isDeclared())
+ {
+ curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
+
+ if (curTypeInfo)
+ {
+ typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
+ }
+ else
+ {
+ curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
+
+ if (curDV)
+ {
+ typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
+ }
+ }
+ }
+
+ fPSVIElement->reset
+ (
+ PSVIElement::VALIDITY_NOTKNOWN
+ , PSVIElement::VALIDATION_NONE
+ , fRootElemName
+ , ((SchemaValidator*) fValidator)->getIsElemSpecified()
+ , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
+ , typeDef
+ , 0 //memberType
+ , fModel
+ , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
+ , 0
+ , 0
+ , 0
+ );
+
+
+ fPSVIHandler->handlePartialElementPSVI
+ (
+ elemDecl->getBaseName()
+ , fURIStringPool->getValueForId(elemDecl->getURI())
+ , fPSVIElement
+ );
+
+ }
+
+ // not empty
+ fErrorStack->push(fPSVIElemContext.fErrorOccurred);
+ }
+
+ return true;
+}
+
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Helper methos
+// ---------------------------------------------------------------------------
+void IGXMLScanner::resizeElemState() {
+
+ unsigned int newSize = fElemStateSize * 2;
+ unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
+ (
+ newSize * sizeof(unsigned int)
+ ); //new unsigned int[newSize];
+ unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
+ (
+ newSize * sizeof(unsigned int)
+ ); //new unsigned int[newSize];
+
+ // Copy the existing values
+ unsigned int index = 0;
+ for (; index < fElemStateSize; index++)
+ {
+ newElemState[index] = fElemState[index];
+ newElemLoopState[index] = fElemLoopState[index];
+ }
+
+ for (; index < newSize; index++)
+ newElemLoopState[index] = newElemState[index] = 0;
+
+ // Delete the old array and udpate our members
+ fMemoryManager->deallocate(fElemState); //delete [] fElemState;
+ fMemoryManager->deallocate(fElemLoopState); //delete [] fElemState;
+ fElemState = newElemState;
+ fElemLoopState = newElemLoopState;
+ fElemStateSize = newSize;
+}
+
+void IGXMLScanner::resizeRawAttrColonList() {
+
+ unsigned int newSize = fRawAttrColonListSize * 2;
+ int* newRawAttrColonList = (int*) fMemoryManager->allocate
+ (
+ newSize * sizeof(int)
+ ); //new int[newSize];
+
+ // Copy the existing values
+ unsigned int index = 0;
+ for (; index < fRawAttrColonListSize; index++)
+ newRawAttrColonList[index] = fRawAttrColonList[index];
+
+ // Delete the old array and udpate our members
+ fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
+ fRawAttrColonList = newRawAttrColonList;
+ fRawAttrColonListSize = newSize;
+}
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Grammar preparsing
+// ---------------------------------------------------------------------------
+Grammar* IGXMLScanner::loadGrammar(const InputSource& src
+ , const short grammarType
+ , const bool toCache)
+{
+ Grammar* loadedGrammar = 0;
+
+ ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
+
+ try
+ {
+ fGrammarResolver->cacheGrammarFromParse(false);
+ // if the new grammar has to be cached, better use the already cached
+ // grammars, or the an exception will be thrown when caching an already
+ // cached grammar
+ fGrammarResolver->useCachedGrammarInParse(toCache);
+ fRootGrammar = 0;
+
+ if (fValScheme == Val_Auto) {
+ fValidate = true;
+ }
+
+ // Reset some status flags
+ fInException = false;
+ fStandalone = false;
+ fErrorCount = 0;
+ fHasNoDTD = true;
+ fSeeXsi = false;
+
+ if (grammarType == Grammar::SchemaGrammarType) {
+ loadedGrammar = loadXMLSchemaGrammar(src, toCache);
+ }
+ else if (grammarType == Grammar::DTDGrammarType) {
+ loadedGrammar = loadDTDGrammar(src, toCache);
+ }
+ }
+ // NOTE:
+ //
+ // In all of the error processing below, the emitError() call MUST come
+ // before the flush of the reader mgr, or it will fail because it tries
+ // to find out the position in the XML source of the error.
+ catch(const XMLErrs::Codes)
+ {
+ // This is a 'first fatal error' type exit, so fall through
+ }
+ catch(const XMLValid::Codes)
+ {
+ // This is a 'first fatal error' type exit, so fall through
+ }
+ catch(const XMLException& excToCatch)
+ {
+ // Emit the error and catch any user exception thrown from here. Make
+ // sure in all cases we flush the reader manager.
+ fInException = true;
+ try
+ {
+ if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
+ emitError
+ (
+ XMLErrs::XMLException_Warning
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
+ emitError
+ (
+ XMLErrs::XMLException_Fatal
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ else
+ emitError
+ (
+ XMLErrs::XMLException_Error
+ , excToCatch.getCode()
+ , excToCatch.getMessage()
+ );
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // This is a special case for out-of-memory
+ // conditions, because resetting the ReaderMgr
+ // can be problematic.
+ resetReaderMgr.release();
+
+ throw;
+ }
+ }
+ catch(const OutOfMemoryException&)
+ {
+ // This is a special case for out-of-memory
+ // conditions, because resetting the ReaderMgr
+ // can be problematic.
+ resetReaderMgr.release();
+
+ throw;
+ }
+
+ return loadedGrammar;
+}
+
+void IGXMLScanner::resetCachedGrammar ()
+{
+ fCachedSchemaInfoList->removeAll ();
+}
+
+Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
+ const bool toCache)
+{
+ // Reset the validators
+ fDTDValidator->reset();
+ if (fValidatorFromUser)
+ fValidator->reset();
+
+ if (!fValidator->handlesDTD()) {
+ if (fValidatorFromUser && fValidate)
+ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
+ else {
+ fValidator = fDTDValidator;
+ }
+ }
+
+ fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
+
+ if (fDTDGrammar) {
+ fDTDGrammar->reset();
+ }
+ else {
+ fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
+ fGrammarResolver->putGrammar(fDTDGrammar);
+ }
+
+ fGrammar = fDTDGrammar;
+ fGrammarType = fGrammar->getGrammarType();
+ fValidator->setGrammar(fGrammar);
+
+ // And for all installed handlers, send reset events. This gives them
+ // a chance to flush any cached data.
+ if (fDocHandler)
+ fDocHandler->resetDocument();
+ if (fEntityHandler)
+ fEntityHandler->resetEntities();
+ if (fErrorReporter)
+ fErrorReporter->resetErrors();
+
+ // Clear out the id reference list
+ resetValidationContext();
+ // and clear out the darned undeclared DTD element pool...
+ fDTDElemNonDeclPool->removeAll();
+
+ if (toCache) {
+
+ unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
+ const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
+
+ fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
+ ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
+ fGrammarResolver->putGrammar(fGrammar);
+ }
+
+ // Handle the creation of the XML reader object for this input source.
+ // This will provide us with transcoding and basic lexing services.
+ XMLReader* newReader = fReaderMgr.createReader
+ (
+ src
+ , false
+ , XMLReader::RefFrom_NonLiteral
+ , XMLReader::Type_General
+ , XMLReader::Source_External
+ , fCalculateSrcOfs
+ , fLowWaterMark
+ );
+ if (!newReader) {
+ if (src.getIssueFatalErrorIfNotFound())
+ ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
+ else
+ ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
+ }
+
+ // In order to make the processing work consistently, we have to
+ // make this look like an external entity. So create an entity
+ // decl and fill it in and push it with the reader, as happens
+ // with an external entity. Put a janitor on it to insure it gets
+ // cleaned up. The reader manager does not adopt them.
+ const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
+ DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
+ declDTD->setSystemId(src.getSystemId());
+ declDTD->setIsExternal(true);
+
+ // Mark this one as a throw at end
+ newReader->setThrowAtEnd(true);
+
+ // And push it onto the stack, with its pseudo name
+ fReaderMgr.pushReader(newReader, declDTD);
+
+ // If we have a doc type handler and advanced callbacks are enabled,
+ // call the doctype event.
+ if (fDocTypeHandler) {
+
+ // Create a dummy root
+ DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
+ (
+ gDTDStr
+ , fEmptyNamespaceId
+ , DTDElementDecl::Any
+ , fGrammarPoolMemoryManager
+ );
+ rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
+ rootDecl->setExternalElemDeclaration(true);
+ Janitor<DTDElementDecl> janSrc(rootDecl);
+
+ fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
+ }
+
+ // Create DTDScanner
+ DTDScanner dtdScanner
+ (
+ (DTDGrammar*) fGrammar
+ , fDocTypeHandler
+ , fGrammarPoolMemoryManager
+ , fMemoryManager
+ );
+ dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
+
+ // Tell it its not in an include section
+ dtdScanner.scanExtSubsetDecl(false, true);
+
+ if (fValidate) {
+ // validate the DTD scan so far
+ fValidator->preContentValidation(false, true);
+ }
+
+ if (toCache)
+ fGrammarResolver->cacheGrammars();
+
+ return fDTDGrammar;
+}
+
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Helper methods
+// ---------------------------------------------------------------------------
+void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc)
+{
+ XMLCh* locStr = schemaLoc;
+ XMLReader* curReader = fReaderMgr.getCurrentReader();
+
+ fLocationPairs->removeAllElements();
+ while (*locStr)
+ {
+ do {
+ // Do we have an escaped character ?
+ if (*locStr == 0xFFFF)
+ continue;
+
+ if (!curReader->isWhitespace(*locStr))
+ break;
+
+ *locStr = chNull;
+ } while (*++locStr);
+
+ if (*locStr) {
+
+ fLocationPairs->addElement(locStr);
+
+ while (*++locStr) {
+ // Do we have an escaped character ?
+ if (*locStr == 0xFFFF)
+ continue;
+ if (curReader->isWhitespace(*locStr))
+ break;
+ }
+ }
+ }
+}
+
+void IGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
+ DatatypeValidator* const memberDV)
+{
+ PSVIElement::ASSESSMENT_TYPE validationAttempted;
+ PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
+
+ if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
+ validationAttempted = PSVIElement::VALIDATION_FULL;
+ else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
+ validationAttempted = PSVIElement::VALIDATION_NONE;
+ else
+ {
+ validationAttempted = PSVIElement::VALIDATION_PARTIAL;
+ fPSVIElemContext.fFullValidationDepth =
+ fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
+ }
+
+ if (fValidate && elemDecl->isDeclared())
+ {
+ validity = (fPSVIElemContext.fErrorOccurred)
+ ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
+ }
+
+ XSTypeDefinition* typeDef = 0;
+ bool isMixed = false;
+ if (fPSVIElemContext.fCurrentTypeInfo)
+ {
+ typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
+ SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
+ isMixed = (modelType == SchemaElementDecl::Mixed_Simple
+ || modelType == SchemaElementDecl::Mixed_Complex);
+ }
+ else if (fPSVIElemContext.fCurrentDV)
+ typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
+
+ XMLCh* canonicalValue = 0;
+ if (fPSVIElemContext.fNormalizedValue && !isMixed &&
+ validity == PSVIElement::VALIDITY_VALID)
+ {
+ if (memberDV)
+ canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
+ else if (fPSVIElemContext.fCurrentDV)
+ canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
+ }
+
+ fPSVIElement->reset
+ (
+ validity
+ , validationAttempted
+ , fRootElemName
+ , fPSVIElemContext.fIsSpecified
+ , (elemDecl->isDeclared())
+ ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
+ , typeDef
+ , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
+ , fModel
+ , elemDecl->getDefaultValue()
+ , fPSVIElemContext.fNormalizedValue
+ , canonicalValue
+ );
+
+ fPSVIHandler->handleElementPSVI
+ (
+ elemDecl->getBaseName()
+ , fURIStringPool->getValueForId(elemDecl->getURI())
+ , fPSVIElement
+ );
+
+ // decrease element depth
+ fPSVIElemContext.fElemDepth--;
+
+}
+
+void IGXMLScanner::resetPSVIElemContext()
+{
+ fPSVIElemContext.fIsSpecified = false;
+ fPSVIElemContext.fErrorOccurred = false;
+ fPSVIElemContext.fElemDepth = -1;
+ fPSVIElemContext.fFullValidationDepth = -1;
+ fPSVIElemContext.fNoneValidationDepth = -1;
+ fPSVIElemContext.fCurrentDV = 0;
+ fPSVIElemContext.fCurrentTypeInfo = 0;
+ fPSVIElemContext.fNormalizedValue = 0;
+}
+
+XERCES_CPP_NAMESPACE_END
diff --git a/libxerces-c/xercesc/internal/IGXMLScanner.hpp b/libxerces-c/xercesc/internal/IGXMLScanner.hpp
new file mode 120000
index 0000000..1bb7488
--- /dev/null
+++ b/libxerces-c/xercesc/internal/IGXMLScanner.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/IGXMLScanner.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/IGXMLScanner2.cpp b/libxerces-c/xercesc/internal/IGXMLScanner2.cpp
new file mode 120000
index 0000000..f375282
--- /dev/null
+++ b/libxerces-c/xercesc/internal/IGXMLScanner2.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/IGXMLScanner2.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/MemoryManagerImpl.cpp b/libxerces-c/xercesc/internal/MemoryManagerImpl.cpp
new file mode 120000
index 0000000..ec40c4f
--- /dev/null
+++ b/libxerces-c/xercesc/internal/MemoryManagerImpl.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/MemoryManagerImpl.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/MemoryManagerImpl.hpp b/libxerces-c/xercesc/internal/MemoryManagerImpl.hpp
new file mode 120000
index 0000000..bd7d337
--- /dev/null
+++ b/libxerces-c/xercesc/internal/MemoryManagerImpl.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/MemoryManagerImpl.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/ReaderMgr.cpp b/libxerces-c/xercesc/internal/ReaderMgr.cpp
new file mode 120000
index 0000000..6604eb1
--- /dev/null
+++ b/libxerces-c/xercesc/internal/ReaderMgr.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/ReaderMgr.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/ReaderMgr.hpp b/libxerces-c/xercesc/internal/ReaderMgr.hpp
new file mode 120000
index 0000000..5a47793
--- /dev/null
+++ b/libxerces-c/xercesc/internal/ReaderMgr.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/ReaderMgr.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/SGXMLScanner.cpp b/libxerces-c/xercesc/internal/SGXMLScanner.cpp
new file mode 120000
index 0000000..ddbe8e7
--- /dev/null
+++ b/libxerces-c/xercesc/internal/SGXMLScanner.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/SGXMLScanner.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/SGXMLScanner.hpp b/libxerces-c/xercesc/internal/SGXMLScanner.hpp
new file mode 120000
index 0000000..6e75991
--- /dev/null
+++ b/libxerces-c/xercesc/internal/SGXMLScanner.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/SGXMLScanner.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/ValidationContextImpl.cpp b/libxerces-c/xercesc/internal/ValidationContextImpl.cpp
new file mode 120000
index 0000000..4688372
--- /dev/null
+++ b/libxerces-c/xercesc/internal/ValidationContextImpl.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/ValidationContextImpl.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/ValidationContextImpl.hpp b/libxerces-c/xercesc/internal/ValidationContextImpl.hpp
new file mode 120000
index 0000000..c3c006f
--- /dev/null
+++ b/libxerces-c/xercesc/internal/ValidationContextImpl.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/ValidationContextImpl.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/VecAttrListImpl.cpp b/libxerces-c/xercesc/internal/VecAttrListImpl.cpp
new file mode 120000
index 0000000..5c6f8c6
--- /dev/null
+++ b/libxerces-c/xercesc/internal/VecAttrListImpl.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/VecAttrListImpl.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/VecAttrListImpl.hpp b/libxerces-c/xercesc/internal/VecAttrListImpl.hpp
new file mode 120000
index 0000000..a670e40
--- /dev/null
+++ b/libxerces-c/xercesc/internal/VecAttrListImpl.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/VecAttrListImpl.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/VecAttributesImpl.cpp b/libxerces-c/xercesc/internal/VecAttributesImpl.cpp
new file mode 120000
index 0000000..32ecab5
--- /dev/null
+++ b/libxerces-c/xercesc/internal/VecAttributesImpl.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/VecAttributesImpl.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/VecAttributesImpl.hpp b/libxerces-c/xercesc/internal/VecAttributesImpl.hpp
new file mode 120000
index 0000000..12fc329
--- /dev/null
+++ b/libxerces-c/xercesc/internal/VecAttributesImpl.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/VecAttributesImpl.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/WFXMLScanner.cpp b/libxerces-c/xercesc/internal/WFXMLScanner.cpp
new file mode 120000
index 0000000..490c388
--- /dev/null
+++ b/libxerces-c/xercesc/internal/WFXMLScanner.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/WFXMLScanner.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/WFXMLScanner.hpp b/libxerces-c/xercesc/internal/WFXMLScanner.hpp
new file mode 120000
index 0000000..42a74d3
--- /dev/null
+++ b/libxerces-c/xercesc/internal/WFXMLScanner.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/WFXMLScanner.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLInternalErrorHandler.hpp b/libxerces-c/xercesc/internal/XMLInternalErrorHandler.hpp
new file mode 120000
index 0000000..9c60697
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLInternalErrorHandler.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLInternalErrorHandler.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLReader.cpp b/libxerces-c/xercesc/internal/XMLReader.cpp
new file mode 120000
index 0000000..bc272be
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLReader.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLReader.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLReader.hpp b/libxerces-c/xercesc/internal/XMLReader.hpp
new file mode 120000
index 0000000..83d2e6a
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLReader.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLReader.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLScanner.cpp b/libxerces-c/xercesc/internal/XMLScanner.cpp
new file mode 120000
index 0000000..0838aee
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLScanner.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLScanner.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLScanner.hpp b/libxerces-c/xercesc/internal/XMLScanner.hpp
new file mode 120000
index 0000000..f7b5e38
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLScanner.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLScanner.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLScannerResolver.cpp b/libxerces-c/xercesc/internal/XMLScannerResolver.cpp
new file mode 120000
index 0000000..531bca6
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLScannerResolver.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLScannerResolver.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XMLScannerResolver.hpp b/libxerces-c/xercesc/internal/XMLScannerResolver.hpp
new file mode 120000
index 0000000..0986799
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XMLScannerResolver.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XMLScannerResolver.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XProtoType.cpp b/libxerces-c/xercesc/internal/XProtoType.cpp
new file mode 120000
index 0000000..7b084aa
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XProtoType.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XProtoType.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XProtoType.hpp b/libxerces-c/xercesc/internal/XProtoType.hpp
new file mode 120000
index 0000000..1328265
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XProtoType.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XProtoType.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSAXMLScanner.cpp b/libxerces-c/xercesc/internal/XSAXMLScanner.cpp
new file mode 120000
index 0000000..2bfbe1f
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSAXMLScanner.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSAXMLScanner.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSAXMLScanner.hpp b/libxerces-c/xercesc/internal/XSAXMLScanner.hpp
new file mode 120000
index 0000000..6690184
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSAXMLScanner.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSAXMLScanner.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSObjectFactory.cpp b/libxerces-c/xercesc/internal/XSObjectFactory.cpp
new file mode 120000
index 0000000..108a8a0
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSObjectFactory.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSObjectFactory.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSObjectFactory.hpp b/libxerces-c/xercesc/internal/XSObjectFactory.hpp
new file mode 120000
index 0000000..8970527
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSObjectFactory.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSObjectFactory.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSerializable.hpp b/libxerces-c/xercesc/internal/XSerializable.hpp
new file mode 120000
index 0000000..54583e9
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSerializable.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSerializable.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSerializationException.hpp b/libxerces-c/xercesc/internal/XSerializationException.hpp
new file mode 120000
index 0000000..b5d1f6b
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSerializationException.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSerializationException.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSerializeEngine.cpp b/libxerces-c/xercesc/internal/XSerializeEngine.cpp
new file mode 120000
index 0000000..ecf86e5
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSerializeEngine.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSerializeEngine.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XSerializeEngine.hpp b/libxerces-c/xercesc/internal/XSerializeEngine.hpp
new file mode 120000
index 0000000..b776cc1
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XSerializeEngine.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XSerializeEngine.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XTemplateSerializer.cpp b/libxerces-c/xercesc/internal/XTemplateSerializer.cpp
new file mode 120000
index 0000000..bbb09ee
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XTemplateSerializer.cpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XTemplateSerializer.cpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/internal/XTemplateSerializer.hpp b/libxerces-c/xercesc/internal/XTemplateSerializer.hpp
new file mode 120000
index 0000000..9ae5884
--- /dev/null
+++ b/libxerces-c/xercesc/internal/XTemplateSerializer.hpp
@@ -0,0 +1 @@
+../../../upstream/src/xercesc/internal/XTemplateSerializer.hpp \ No newline at end of file
diff --git a/libxerces-c/xercesc/util/.gitattributes b/libxerces-c/xercesc/util/.gitattributes
new file mode 100644
index 0000000..05b35fc
--- /dev/null
+++ b/libxerces-c/xercesc/util/.gitattributes
@@ -0,0 +1,2 @@
+FileManagers symlink=dir
+regx symlink=dir
diff --git a/libxerces-c/xercesc/util/KVStringPair.hpp b/libxerces-c/xercesc/util/KVStringPair.hpp
index 32d0edd..87e2947 120000..100644
--- a/libxerces-c/xercesc/util/KVStringPair.hpp
+++ b/libxerces-c/xercesc/util/KVStringPair.hpp
@@ -1 +1,223 @@
-../../../upstream/src/xercesc/util/KVStringPair.hpp \ No newline at end of file
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * $Id$
+ */
+
+#if !defined(XERCESC_INCLUDE_GUARD_KVSTRINGPAIR_HPP)
+#define XERCESC_INCLUDE_GUARD_KVSTRINGPAIR_HPP
+
+#include <xercesc/util/XMemory.hpp>
+#include <xercesc/util/PlatformUtils.hpp>
+
+#include <xercesc/internal/XSerializable.hpp>
+
+XERCES_CPP_NAMESPACE_BEGIN
+
+//
+// This class provides a commonly used data structure, which is that of
+// a pair of strings which represent a 'key=value' type mapping. It works
+// only in terms of XMLCh type raw strings.
+//
+class XMLUTIL_EXPORT KVStringPair : public XSerializable, public XMemory
+{
+public:
+ // -----------------------------------------------------------------------
+ // Constructors and Destructor
+ // -----------------------------------------------------------------------
+ KVStringPair(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
+ KVStringPair
+ (
+ const XMLCh* const key
+ , const XMLCh* const value
+ , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
+ );
+ KVStringPair
+ (
+ const XMLCh* const key
+ , const XMLCh* const value
+ , const XMLSize_t valueLength
+ , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
+ );
+ KVStringPair
+ (
+ const XMLCh* const key
+ , const XMLSize_t keyLength
+ , const XMLCh* const value
+ , const XMLSize_t valueLength
+ , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
+ );
+ KVStringPair(const KVStringPair& toCopy);
+ ~KVStringPair();
+
+
+ // -----------------------------------------------------------------------
+ // Getters
+ //
+ // We support the
+ // -----------------------------------------------------------------------
+ const XMLCh* getKey() const;
+ XMLCh* getKey();
+ const XMLCh* getValue() const;
+ XMLCh* getValue();
+
+
+ // -----------------------------------------------------------------------
+ // Setters
+ // -----------------------------------------------------------------------
+ void setKey(const XMLCh* const newKey);
+ void setValue(const XMLCh* const newValue);
+ void setKey
+ (
+ const XMLCh* const newKey
+ , const XMLSize_t newKeyLength
+ );
+ void setValue
+ (
+ const XMLCh* const newValue
+ , const XMLSize_t newValueLength
+ );
+ void set
+ (
+ const XMLCh* const newKey
+ , const XMLCh* const newValue
+ );
+ void set
+ (
+ const XMLCh* const newKey
+ , const XMLSize_t newKeyLength
+ , const XMLCh* const newValue
+ , const XMLSize_t newValueLength
+ );
+
+ /***
+ * Support for Serialization/De-serialization
+ ***/
+ DECL_XSERIALIZABLE(KVStringPair)
+
+private :
+ // unimplemented:
+
+ KVStringPair& operator=(const KVStringPair&);
+ // -----------------------------------------------------------------------
+ // Private data members
+ //
+ // fKey
+ // The string that represents the key field of this object.
+ //
+ // fKeyAllocSize
+ // The amount of memory allocated for fKey.
+ //
+ // fValue
+ // The string that represents the value of this pair object.
+ //
+ // fValueAllocSize
+ // The amount of memory allocated for fValue.
+ //
+ // -----------------------------------------------------------------------
+ XMLSize_t fKeyAllocSize;
+ XMLSize_t fValueAllocSize;
+ XMLCh* fKey;
+ XMLCh* fValue;
+ MemoryManager* fMemoryManager;
+};
+
+// ---------------------------------------------------------------------------
+// KVStringPair: Getters
+// ---------------------------------------------------------------------------
+inline const XMLCh* KVStringPair::getKey() const
+{
+ return fKey;
+}
+
+inline XMLCh* KVStringPair::getKey()
+{
+ return fKey;
+}
+
+inline const XMLCh* KVStringPair::getValue() const
+{
+ return fValue;
+}
+
+inline XMLCh* KVStringPair::getValue()
+{
+ return fValue;
+}
+
+// ---------------------------------------------------------------------------
+// KVStringPair: Setters
+// ---------------------------------------------------------------------------
+inline void KVStringPair::setKey( const XMLCh* const newKey
+ , const XMLSize_t newKeyLength)
+{
+ if (newKeyLength >= fKeyAllocSize)
+ {
+ fMemoryManager->deallocate(fKey); //delete [] fKey;
+ fKey = 0;
+ fKeyAllocSize = newKeyLength + 1;
+ fKey = (XMLCh*) fMemoryManager->allocate(fKeyAllocSize * sizeof(XMLCh)); //new XMLCh[fKeyAllocSize];
+ }
+
+ memcpy(fKey, newKey, (newKeyLength+1) * sizeof(XMLCh)); // len+1 because of the 0 at the end
+}
+
+inline void KVStringPair::setValue( const XMLCh* const newValue
+ , const XMLSize_t newValueLength)
+{
+ if (newValueLength >= fValueAllocSize)
+ {
+ fMemoryManager->deallocate(fValue); //delete [] fValue;
+ fValue = 0;
+ fValueAllocSize = newValueLength + 1;
+ fValue = (XMLCh*) fMemoryManager->allocate(fValueAllocSize * sizeof(XMLCh)); //new XMLCh[fValueAllocSize];
+ }
+
+ memcpy(fValue, newValue, (newValueLength+1) * sizeof(XMLCh)); // len+1 because of the 0 at the end
+}
+
+inline void KVStringPair::setKey(const XMLCh* const newKey)
+{
+ setKey(newKey, XMLString::stringLen(newKey));
+}
+
+inline void KVStringPair::setValue(const XMLCh* const newValue)
+{
+ setValue(newValue, XMLString::stringLen(newValue));
+}
+
+inline void KVStringPair::set( const XMLCh* const newKey
+ , const XMLCh* const newValue)
+{
+ setKey(newKey, XMLString::stringLen(newKey));
+ setValue(newValue, XMLString::stringLen(newValue));
+}
+
+inline void KVStringPair::set( const XMLCh* const newKey
+ , const XMLSize_t newKeyLength
+ , const XMLCh* const newValue
+ , const XMLSize_t newValueLength)
+{
+ setKey(newKey, newKeyLength);
+ setValue(newValue, newValueLength);
+}
+
+
+XERCES_CPP_NAMESPACE_END
+
+#endif