week06
This commit is contained in:
950
env/lib/python3.12/site-packages/lxml/xslt.pxi
vendored
Normal file
950
env/lib/python3.12/site-packages/lxml/xslt.pxi
vendored
Normal file
@ -0,0 +1,950 @@
|
||||
# XSLT
|
||||
from lxml.includes cimport xslt
|
||||
|
||||
|
||||
cdef class XSLTError(LxmlError):
|
||||
"""Base class of all XSLT errors.
|
||||
"""
|
||||
|
||||
cdef class XSLTParseError(XSLTError):
|
||||
"""Error parsing a stylesheet document.
|
||||
"""
|
||||
|
||||
cdef class XSLTApplyError(XSLTError):
|
||||
"""Error running an XSL transformation.
|
||||
"""
|
||||
|
||||
class XSLTSaveError(XSLTError, SerialisationError):
|
||||
"""Error serialising an XSLT result.
|
||||
"""
|
||||
|
||||
cdef class XSLTExtensionError(XSLTError):
|
||||
"""Error registering an XSLT extension.
|
||||
"""
|
||||
|
||||
|
||||
# version information
|
||||
LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
|
||||
LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
|
||||
|
||||
|
||||
################################################################################
|
||||
# Where do we store what?
|
||||
#
|
||||
# xsltStylesheet->doc->_private
|
||||
# == _XSLTResolverContext for XSL stylesheet
|
||||
#
|
||||
# xsltTransformContext->_private
|
||||
# == _XSLTResolverContext for transformed document
|
||||
#
|
||||
################################################################################
|
||||
|
||||
|
||||
################################################################################
|
||||
# XSLT document loaders
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _XSLTResolverContext(_ResolverContext):
|
||||
cdef xmlDoc* _c_style_doc
|
||||
cdef _BaseParser _parser
|
||||
|
||||
cdef _XSLTResolverContext _copy(self):
|
||||
cdef _XSLTResolverContext context
|
||||
context = _XSLTResolverContext()
|
||||
_initXSLTResolverContext(context, self._parser)
|
||||
context._c_style_doc = self._c_style_doc
|
||||
return context
|
||||
|
||||
cdef _initXSLTResolverContext(_XSLTResolverContext context,
|
||||
_BaseParser parser):
|
||||
_initResolverContext(context, parser.resolvers)
|
||||
context._parser = parser
|
||||
context._c_style_doc = NULL
|
||||
|
||||
cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
|
||||
int parse_options, int* error) with gil:
|
||||
# call the Python document loaders
|
||||
cdef _XSLTResolverContext context
|
||||
cdef _ResolverRegistry resolvers
|
||||
cdef _InputDocument doc_ref
|
||||
cdef xmlDoc* c_doc
|
||||
cdef xmlDoc* c_return_doc = NULL
|
||||
|
||||
error[0] = 0
|
||||
context = <_XSLTResolverContext>c_context
|
||||
|
||||
# shortcut if we resolve the stylesheet itself
|
||||
c_doc = context._c_style_doc
|
||||
try:
|
||||
if c_doc is not NULL and c_doc.URL is not NULL:
|
||||
if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
|
||||
c_return_doc = _copyDoc(c_doc, 1)
|
||||
return c_return_doc # 'goto', see 'finally' below
|
||||
|
||||
# delegate to the Python resolvers
|
||||
resolvers = context._resolvers
|
||||
if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
|
||||
c_uri += 26
|
||||
uri = _decodeFilename(c_uri)
|
||||
doc_ref = resolvers.resolve(uri, None, context)
|
||||
|
||||
if doc_ref is not None:
|
||||
if doc_ref._type == PARSER_DATA_STRING:
|
||||
c_return_doc = _parseDoc(
|
||||
doc_ref._data_bytes, doc_ref._filename, context._parser)
|
||||
elif doc_ref._type == PARSER_DATA_FILENAME:
|
||||
c_return_doc = _parseDocFromFile(
|
||||
doc_ref._filename, context._parser)
|
||||
elif doc_ref._type == PARSER_DATA_FILE:
|
||||
c_return_doc = _parseDocFromFilelike(
|
||||
doc_ref._file, doc_ref._filename, context._parser)
|
||||
elif doc_ref._type == PARSER_DATA_EMPTY:
|
||||
c_return_doc = _newXMLDoc()
|
||||
if c_return_doc is not NULL and c_return_doc.URL is NULL:
|
||||
c_return_doc.URL = tree.xmlStrdup(c_uri)
|
||||
except:
|
||||
error[0] = 1
|
||||
context._store_raised()
|
||||
finally:
|
||||
return c_return_doc # and swallow any further exceptions
|
||||
|
||||
|
||||
cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
|
||||
xslt.xsltLoadType c_type) noexcept with gil:
|
||||
try:
|
||||
message = f"Cannot resolve URI {_decodeFilename(c_uri)}"
|
||||
if c_type == xslt.XSLT_LOAD_DOCUMENT:
|
||||
exception = XSLTApplyError(message)
|
||||
else:
|
||||
exception = XSLTParseError(message)
|
||||
(<_XSLTResolverContext>context)._store_exception(exception)
|
||||
except BaseException as e:
|
||||
(<_XSLTResolverContext>context)._store_exception(e)
|
||||
finally:
|
||||
return # and swallow any further exceptions
|
||||
|
||||
|
||||
cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
|
||||
int parse_options, void* c_ctxt,
|
||||
xslt.xsltLoadType c_type) noexcept nogil:
|
||||
# nogil => no Python objects here, may be called without thread context !
|
||||
cdef xmlDoc* c_doc
|
||||
cdef xmlDoc* result
|
||||
cdef void* c_pcontext
|
||||
cdef int error = 0
|
||||
# find resolver contexts of stylesheet and transformed doc
|
||||
if c_type == xslt.XSLT_LOAD_DOCUMENT:
|
||||
# transformation time
|
||||
c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
|
||||
elif c_type == xslt.XSLT_LOAD_STYLESHEET:
|
||||
# include/import resolution while parsing
|
||||
c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
|
||||
else:
|
||||
c_pcontext = NULL
|
||||
|
||||
if c_pcontext is NULL:
|
||||
# can't call Python without context, fall back to default loader
|
||||
return XSLT_DOC_DEFAULT_LOADER(
|
||||
c_uri, c_dict, parse_options, c_ctxt, c_type)
|
||||
|
||||
c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
|
||||
if c_doc is NULL and not error:
|
||||
c_doc = XSLT_DOC_DEFAULT_LOADER(
|
||||
c_uri, c_dict, parse_options, c_ctxt, c_type)
|
||||
if c_doc is NULL:
|
||||
_xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
|
||||
|
||||
if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
|
||||
c_doc._private = c_pcontext
|
||||
return c_doc
|
||||
|
||||
cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
|
||||
xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
|
||||
|
||||
################################################################################
|
||||
# XSLT file/network access control
|
||||
|
||||
cdef class XSLTAccessControl:
|
||||
"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
|
||||
|
||||
Access control for XSLT: reading/writing files, directories and
|
||||
network I/O. Access to a type of resource is granted or denied by
|
||||
passing any of the following boolean keyword arguments. All of
|
||||
them default to True to allow access.
|
||||
|
||||
- read_file
|
||||
- write_file
|
||||
- create_dir
|
||||
- read_network
|
||||
- write_network
|
||||
|
||||
For convenience, there is also a class member `DENY_ALL` that
|
||||
provides an XSLTAccessControl instance that is readily configured
|
||||
to deny everything, and a `DENY_WRITE` member that denies all
|
||||
write access but allows read access.
|
||||
|
||||
See `XSLT`.
|
||||
"""
|
||||
cdef xslt.xsltSecurityPrefs* _prefs
|
||||
def __cinit__(self):
|
||||
self._prefs = xslt.xsltNewSecurityPrefs()
|
||||
if self._prefs is NULL:
|
||||
raise MemoryError()
|
||||
|
||||
def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
|
||||
bint read_network=True, bint write_network=True):
|
||||
self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
|
||||
self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
|
||||
self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
|
||||
self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
|
||||
self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
|
||||
|
||||
DENY_ALL = XSLTAccessControl(
|
||||
read_file=False, write_file=False, create_dir=False,
|
||||
read_network=False, write_network=False)
|
||||
|
||||
DENY_WRITE = XSLTAccessControl(
|
||||
read_file=True, write_file=False, create_dir=False,
|
||||
read_network=True, write_network=False)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._prefs is not NULL:
|
||||
xslt.xsltFreeSecurityPrefs(self._prefs)
|
||||
|
||||
@cython.final
|
||||
cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
|
||||
cdef xslt.xsltSecurityCheck function
|
||||
if allow:
|
||||
function = xslt.xsltSecurityAllow
|
||||
else:
|
||||
function = xslt.xsltSecurityForbid
|
||||
xslt.xsltSetSecurityPrefs(self._prefs, option, function)
|
||||
|
||||
@cython.final
|
||||
cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt) noexcept:
|
||||
xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
"""The access control configuration as a map of options."""
|
||||
return {
|
||||
'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
|
||||
'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
|
||||
'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
|
||||
'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
|
||||
'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
|
||||
}
|
||||
|
||||
@cython.final
|
||||
cdef _optval(self, xslt.xsltSecurityOption option):
|
||||
cdef xslt.xsltSecurityCheck function
|
||||
function = xslt.xsltGetSecurityPrefs(self._prefs, option)
|
||||
if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
|
||||
return True
|
||||
elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
|
||||
return False
|
||||
else:
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
items = sorted(self.options.items())
|
||||
return "%s(%s)" % (
|
||||
python._fqtypename(self).decode('UTF-8').split('.')[-1],
|
||||
', '.join(["%s=%r" % item for item in items]))
|
||||
|
||||
################################################################################
|
||||
# XSLT
|
||||
|
||||
cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf) noexcept:
|
||||
if ns_utf is None:
|
||||
return 0
|
||||
# libxml2 internalises the strings if ctxt has a dict
|
||||
return xslt.xsltRegisterExtFunction(
|
||||
<xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
|
||||
<xslt.xmlXPathFunction>_xpath_function_call)
|
||||
|
||||
cdef dict EMPTY_DICT = {}
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _XSLTContext(_BaseContext):
|
||||
cdef xslt.xsltTransformContext* _xsltCtxt
|
||||
cdef _ReadOnlyElementProxy _extension_element_proxy
|
||||
cdef dict _extension_elements
|
||||
def __cinit__(self):
|
||||
self._xsltCtxt = NULL
|
||||
self._extension_elements = EMPTY_DICT
|
||||
|
||||
def __init__(self, namespaces, extensions, error_log, enable_regexp,
|
||||
build_smart_strings):
|
||||
if extensions is not None and extensions:
|
||||
for ns_name_tuple, extension in extensions.items():
|
||||
if ns_name_tuple[0] is None:
|
||||
raise XSLTExtensionError, \
|
||||
"extensions must not have empty namespaces"
|
||||
if isinstance(extension, XSLTExtension):
|
||||
if self._extension_elements is EMPTY_DICT:
|
||||
self._extension_elements = {}
|
||||
extensions = extensions.copy()
|
||||
ns_utf = _utf8(ns_name_tuple[0])
|
||||
name_utf = _utf8(ns_name_tuple[1])
|
||||
self._extension_elements[(ns_utf, name_utf)] = extension
|
||||
del extensions[ns_name_tuple]
|
||||
_BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
|
||||
build_smart_strings)
|
||||
|
||||
cdef _BaseContext _copy(self):
|
||||
cdef _XSLTContext context
|
||||
context = <_XSLTContext>_BaseContext._copy(self)
|
||||
context._extension_elements = self._extension_elements
|
||||
return context
|
||||
|
||||
cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
|
||||
_Document doc):
|
||||
self._xsltCtxt = xsltCtxt
|
||||
self._set_xpath_context(xsltCtxt.xpathCtxt)
|
||||
self._register_context(doc)
|
||||
self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
|
||||
self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
|
||||
_registerXSLTExtensions(xsltCtxt, self._extension_elements)
|
||||
|
||||
cdef free_context(self):
|
||||
self._cleanup_context()
|
||||
self._release_context()
|
||||
if self._xsltCtxt is not NULL:
|
||||
xslt.xsltFreeTransformContext(self._xsltCtxt)
|
||||
self._xsltCtxt = NULL
|
||||
self._release_temp_refs()
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _XSLTQuotedStringParam:
|
||||
"""A wrapper class for literal XSLT string parameters that require
|
||||
quote escaping.
|
||||
"""
|
||||
cdef bytes strval
|
||||
def __cinit__(self, strval):
|
||||
self.strval = _utf8(strval)
|
||||
|
||||
|
||||
@cython.no_gc_clear
|
||||
cdef class XSLT:
|
||||
"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
|
||||
|
||||
Turn an XSL document into an XSLT object.
|
||||
|
||||
Calling this object on a tree or Element will execute the XSLT::
|
||||
|
||||
transform = etree.XSLT(xsl_tree)
|
||||
result = transform(xml_tree)
|
||||
|
||||
Keyword arguments of the constructor:
|
||||
|
||||
- extensions: a dict mapping ``(namespace, name)`` pairs to
|
||||
extension functions or extension elements
|
||||
- regexp: enable exslt regular expression support in XPath
|
||||
(default: True)
|
||||
- access_control: access restrictions for network or file
|
||||
system (see `XSLTAccessControl`)
|
||||
|
||||
Keyword arguments of the XSLT call:
|
||||
|
||||
- profile_run: enable XSLT profiling and make the profile available
|
||||
as XML document in ``result.xslt_profile`` (default: False)
|
||||
|
||||
Other keyword arguments of the call are passed to the stylesheet
|
||||
as parameters.
|
||||
"""
|
||||
cdef _XSLTContext _context
|
||||
cdef xslt.xsltStylesheet* _c_style
|
||||
cdef _XSLTResolverContext _xslt_resolver_context
|
||||
cdef XSLTAccessControl _access_control
|
||||
cdef _ErrorLog _error_log
|
||||
|
||||
def __cinit__(self):
|
||||
self._c_style = NULL
|
||||
|
||||
def __init__(self, xslt_input, *, extensions=None, regexp=True,
|
||||
access_control=None):
|
||||
cdef xslt.xsltStylesheet* c_style = NULL
|
||||
cdef xmlDoc* c_doc
|
||||
cdef _Document doc
|
||||
cdef _Element root_node
|
||||
|
||||
doc = _documentOrRaise(xslt_input)
|
||||
root_node = _rootNodeOrRaise(xslt_input)
|
||||
|
||||
# set access control or raise TypeError
|
||||
self._access_control = access_control
|
||||
|
||||
# make a copy of the document as stylesheet parsing modifies it
|
||||
c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
|
||||
|
||||
# make sure we always have a stylesheet URL
|
||||
if c_doc.URL is NULL:
|
||||
doc_url_utf = python.PyUnicode_AsASCIIString(
|
||||
f"string://__STRING__XSLT__/{id(self)}.xslt")
|
||||
c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
|
||||
|
||||
self._error_log = _ErrorLog()
|
||||
self._xslt_resolver_context = _XSLTResolverContext()
|
||||
_initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
|
||||
# keep a copy in case we need to access the stylesheet via 'document()'
|
||||
self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
|
||||
c_doc._private = <python.PyObject*>self._xslt_resolver_context
|
||||
|
||||
with self._error_log:
|
||||
orig_loader = _register_document_loader()
|
||||
c_style = xslt.xsltParseStylesheetDoc(c_doc)
|
||||
_reset_document_loader(orig_loader)
|
||||
|
||||
if c_style is NULL or c_style.errors:
|
||||
tree.xmlFreeDoc(c_doc)
|
||||
if c_style is not NULL:
|
||||
xslt.xsltFreeStylesheet(c_style)
|
||||
self._xslt_resolver_context._raise_if_stored()
|
||||
# last error seems to be the most accurate here
|
||||
if self._error_log.last_error is not None and \
|
||||
self._error_log.last_error.message:
|
||||
raise XSLTParseError(self._error_log.last_error.message,
|
||||
self._error_log)
|
||||
else:
|
||||
raise XSLTParseError(
|
||||
self._error_log._buildExceptionMessage(
|
||||
"Cannot parse stylesheet"),
|
||||
self._error_log)
|
||||
|
||||
c_doc._private = NULL # no longer used!
|
||||
self._c_style = c_style
|
||||
self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._xslt_resolver_context is not None and \
|
||||
self._xslt_resolver_context._c_style_doc is not NULL:
|
||||
tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
|
||||
# this cleans up the doc copy as well
|
||||
if self._c_style is not NULL:
|
||||
xslt.xsltFreeStylesheet(self._c_style)
|
||||
|
||||
@property
|
||||
def error_log(self):
|
||||
"""The log of errors and warnings of an XSLT execution."""
|
||||
return self._error_log.copy()
|
||||
|
||||
@staticmethod
|
||||
def strparam(strval):
|
||||
"""strparam(strval)
|
||||
|
||||
Mark an XSLT string parameter that requires quote escaping
|
||||
before passing it into the transformation. Use it like this::
|
||||
|
||||
result = transform(doc, some_strval = XSLT.strparam(
|
||||
'''it's \"Monty Python's\" ...'''))
|
||||
|
||||
Escaped string parameters can be reused without restriction.
|
||||
"""
|
||||
return _XSLTQuotedStringParam(strval)
|
||||
|
||||
@staticmethod
|
||||
def set_global_max_depth(int max_depth):
|
||||
"""set_global_max_depth(max_depth)
|
||||
|
||||
The maximum traversal depth that the stylesheet engine will allow.
|
||||
This does not only count the template recursion depth but also takes
|
||||
the number of variables/parameters into account. The required setting
|
||||
for a run depends on both the stylesheet and the input data.
|
||||
|
||||
Example::
|
||||
|
||||
XSLT.set_global_max_depth(5000)
|
||||
|
||||
Note that this is currently a global, module-wide setting because
|
||||
libxslt does not support it at a per-stylesheet level.
|
||||
"""
|
||||
if max_depth < 0:
|
||||
raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
|
||||
xslt.xsltMaxDepth = max_depth
|
||||
|
||||
def tostring(self, _ElementTree result_tree):
|
||||
"""tostring(self, result_tree)
|
||||
|
||||
Save result doc to string based on stylesheet output method.
|
||||
|
||||
:deprecated: use str(result_tree) instead.
|
||||
"""
|
||||
return str(result_tree)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return self.__copy__()
|
||||
|
||||
def __copy__(self):
|
||||
return _copyXSLT(self)
|
||||
|
||||
def __call__(self, _input, *, profile_run=False, **kw):
|
||||
"""__call__(self, _input, profile_run=False, **kw)
|
||||
|
||||
Execute the XSL transformation on a tree or Element.
|
||||
|
||||
Pass the ``profile_run`` option to get profile information
|
||||
about the XSLT. The result of the XSLT will have a property
|
||||
xslt_profile that holds an XML tree with profiling data.
|
||||
"""
|
||||
cdef _XSLTContext context = None
|
||||
cdef _XSLTResolverContext resolver_context
|
||||
cdef _Document input_doc
|
||||
cdef _Element root_node
|
||||
cdef _Document result_doc
|
||||
cdef _Document profile_doc = None
|
||||
cdef xmlDoc* c_profile_doc
|
||||
cdef xslt.xsltTransformContext* transform_ctxt
|
||||
cdef xmlDoc* c_result = NULL
|
||||
cdef xmlDoc* c_doc
|
||||
cdef tree.xmlDict* c_dict
|
||||
cdef const_char** params = NULL
|
||||
|
||||
assert self._c_style is not NULL, "XSLT stylesheet not initialised"
|
||||
input_doc = _documentOrRaise(_input)
|
||||
root_node = _rootNodeOrRaise(_input)
|
||||
|
||||
c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
|
||||
|
||||
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
|
||||
if transform_ctxt is NULL:
|
||||
_destroyFakeDoc(input_doc._c_doc, c_doc)
|
||||
raise MemoryError()
|
||||
|
||||
# using the stylesheet dict is safer than using a possibly
|
||||
# unrelated dict from the current thread. Almost all
|
||||
# non-input tag/attr names will come from the stylesheet
|
||||
# anyway.
|
||||
if transform_ctxt.dict is not NULL:
|
||||
xmlparser.xmlDictFree(transform_ctxt.dict)
|
||||
if kw:
|
||||
# parameter values are stored in the dict
|
||||
# => avoid unnecessarily cluttering the global dict
|
||||
transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
|
||||
if transform_ctxt.dict is NULL:
|
||||
xslt.xsltFreeTransformContext(transform_ctxt)
|
||||
raise MemoryError()
|
||||
else:
|
||||
transform_ctxt.dict = self._c_style.doc.dict
|
||||
xmlparser.xmlDictReference(transform_ctxt.dict)
|
||||
|
||||
xslt.xsltSetCtxtParseOptions(
|
||||
transform_ctxt, input_doc._parser._parse_options)
|
||||
|
||||
if profile_run:
|
||||
transform_ctxt.profile = 1
|
||||
|
||||
try:
|
||||
context = self._context._copy()
|
||||
context.register_context(transform_ctxt, input_doc)
|
||||
|
||||
resolver_context = self._xslt_resolver_context._copy()
|
||||
transform_ctxt._private = <python.PyObject*>resolver_context
|
||||
|
||||
_convert_xslt_parameters(transform_ctxt, kw, ¶ms)
|
||||
c_result = self._run_transform(
|
||||
c_doc, params, context, transform_ctxt)
|
||||
if params is not NULL:
|
||||
# deallocate space for parameters
|
||||
python.lxml_free(params)
|
||||
|
||||
if transform_ctxt.state != xslt.XSLT_STATE_OK:
|
||||
if c_result is not NULL:
|
||||
tree.xmlFreeDoc(c_result)
|
||||
c_result = NULL
|
||||
|
||||
if transform_ctxt.profile:
|
||||
c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
|
||||
if c_profile_doc is not NULL:
|
||||
profile_doc = _documentFactory(
|
||||
c_profile_doc, input_doc._parser)
|
||||
finally:
|
||||
if context is not None:
|
||||
context.free_context()
|
||||
_destroyFakeDoc(input_doc._c_doc, c_doc)
|
||||
|
||||
try:
|
||||
if resolver_context is not None and resolver_context._has_raised():
|
||||
if c_result is not NULL:
|
||||
tree.xmlFreeDoc(c_result)
|
||||
c_result = NULL
|
||||
resolver_context._raise_if_stored()
|
||||
|
||||
if context._exc._has_raised():
|
||||
if c_result is not NULL:
|
||||
tree.xmlFreeDoc(c_result)
|
||||
c_result = NULL
|
||||
context._exc._raise_if_stored()
|
||||
|
||||
if c_result is NULL:
|
||||
# last error seems to be the most accurate here
|
||||
error = self._error_log.last_error
|
||||
if error is not None and error.message:
|
||||
if error.line > 0:
|
||||
message = f"{error.message}, line {error.line}"
|
||||
else:
|
||||
message = error.message
|
||||
elif error is not None and error.line > 0:
|
||||
message = f"Error applying stylesheet, line {error.line}"
|
||||
else:
|
||||
message = "Error applying stylesheet"
|
||||
raise XSLTApplyError(message, self._error_log)
|
||||
finally:
|
||||
if resolver_context is not None:
|
||||
resolver_context.clear()
|
||||
|
||||
result_doc = _documentFactory(c_result, input_doc._parser)
|
||||
|
||||
c_dict = c_result.dict
|
||||
xmlparser.xmlDictReference(c_dict)
|
||||
__GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
|
||||
if c_dict is not c_result.dict or \
|
||||
self._c_style.doc.dict is not c_result.dict or \
|
||||
input_doc._c_doc.dict is not c_result.dict:
|
||||
with nogil:
|
||||
if c_dict is not c_result.dict:
|
||||
fixThreadDictNames(<xmlNode*>c_result,
|
||||
c_dict, c_result.dict)
|
||||
if self._c_style.doc.dict is not c_result.dict:
|
||||
fixThreadDictNames(<xmlNode*>c_result,
|
||||
self._c_style.doc.dict, c_result.dict)
|
||||
if input_doc._c_doc.dict is not c_result.dict:
|
||||
fixThreadDictNames(<xmlNode*>c_result,
|
||||
input_doc._c_doc.dict, c_result.dict)
|
||||
xmlparser.xmlDictFree(c_dict)
|
||||
|
||||
return _xsltResultTreeFactory(result_doc, self, profile_doc)
|
||||
|
||||
cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
|
||||
const_char** params, _XSLTContext context,
|
||||
xslt.xsltTransformContext* transform_ctxt):
|
||||
cdef xmlDoc* c_result
|
||||
xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
|
||||
<xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
|
||||
if self._access_control is not None:
|
||||
self._access_control._register_in_context(transform_ctxt)
|
||||
with self._error_log, nogil:
|
||||
orig_loader = _register_document_loader()
|
||||
c_result = xslt.xsltApplyStylesheetUser(
|
||||
self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
|
||||
_reset_document_loader(orig_loader)
|
||||
return c_result
|
||||
|
||||
|
||||
cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
|
||||
dict parameters, const_char*** params_ptr):
|
||||
cdef Py_ssize_t i, parameter_count
|
||||
cdef const_char** params
|
||||
cdef tree.xmlDict* c_dict = transform_ctxt.dict
|
||||
params_ptr[0] = NULL
|
||||
parameter_count = len(parameters)
|
||||
if parameter_count == 0:
|
||||
return
|
||||
# allocate space for parameters
|
||||
# * 2 as we want an entry for both key and value,
|
||||
# and + 1 as array is NULL terminated
|
||||
params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
|
||||
if not params:
|
||||
raise MemoryError()
|
||||
try:
|
||||
i = 0
|
||||
for key, value in parameters.iteritems():
|
||||
k = _utf8(key)
|
||||
if isinstance(value, _XSLTQuotedStringParam):
|
||||
v = (<_XSLTQuotedStringParam>value).strval
|
||||
xslt.xsltQuoteOneUserParam(
|
||||
transform_ctxt, _xcstr(k), _xcstr(v))
|
||||
else:
|
||||
if isinstance(value, XPath):
|
||||
v = (<XPath>value)._path
|
||||
else:
|
||||
v = _utf8(value)
|
||||
params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(k), len(k))
|
||||
i += 1
|
||||
params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(v), len(v))
|
||||
i += 1
|
||||
except:
|
||||
python.lxml_free(params)
|
||||
raise
|
||||
params[i] = NULL
|
||||
params_ptr[0] = params
|
||||
|
||||
cdef XSLT _copyXSLT(XSLT stylesheet):
|
||||
cdef XSLT new_xslt
|
||||
cdef xmlDoc* c_doc
|
||||
assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
|
||||
new_xslt = XSLT.__new__(XSLT)
|
||||
new_xslt._access_control = stylesheet._access_control
|
||||
new_xslt._error_log = _ErrorLog()
|
||||
new_xslt._context = stylesheet._context._copy()
|
||||
|
||||
new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
|
||||
new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
|
||||
stylesheet._xslt_resolver_context._c_style_doc, 1)
|
||||
|
||||
c_doc = _copyDoc(stylesheet._c_style.doc, 1)
|
||||
new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
|
||||
if new_xslt._c_style is NULL:
|
||||
tree.xmlFreeDoc(c_doc)
|
||||
raise MemoryError()
|
||||
|
||||
return new_xslt
|
||||
|
||||
@cython.final
|
||||
cdef class _XSLTResultTree(_ElementTree):
|
||||
"""The result of an XSLT evaluation.
|
||||
|
||||
Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
|
||||
and the ``.write_output()`` method to write serialise to a file.
|
||||
"""
|
||||
cdef XSLT _xslt
|
||||
cdef _Document _profile
|
||||
cdef xmlChar* _buffer
|
||||
cdef Py_ssize_t _buffer_len
|
||||
cdef Py_ssize_t _buffer_refcnt
|
||||
|
||||
def write_output(self, file, *, compression=0):
|
||||
"""write_output(self, file, *, compression=0)
|
||||
|
||||
Serialise the XSLT output to a file or file-like object.
|
||||
|
||||
As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
|
||||
the result as defined by the ``<xsl:output>`` tag.
|
||||
"""
|
||||
cdef _FilelikeWriter writer = None
|
||||
cdef _Document doc
|
||||
cdef int r, rclose, c_compression
|
||||
cdef const_xmlChar* c_encoding = NULL
|
||||
cdef tree.xmlOutputBuffer* c_buffer
|
||||
|
||||
if self._context_node is not None:
|
||||
doc = self._context_node._doc
|
||||
else:
|
||||
doc = None
|
||||
if doc is None:
|
||||
doc = self._doc
|
||||
if doc is None:
|
||||
raise XSLTSaveError("No document to serialise")
|
||||
c_compression = compression or 0
|
||||
xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
|
||||
writer = _create_output_buffer(file, <const_char*>c_encoding, compression, &c_buffer, close=False)
|
||||
if writer is None:
|
||||
with nogil:
|
||||
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
|
||||
rclose = tree.xmlOutputBufferClose(c_buffer)
|
||||
else:
|
||||
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
|
||||
rclose = tree.xmlOutputBufferClose(c_buffer)
|
||||
if writer is not None:
|
||||
writer._exc_context._raise_if_stored()
|
||||
if r < 0 or rclose == -1:
|
||||
python.PyErr_SetFromErrno(IOError) # raises IOError
|
||||
|
||||
cdef _saveToStringAndSize(self, xmlChar** s, int* l):
|
||||
cdef _Document doc
|
||||
cdef int r
|
||||
if self._context_node is not None:
|
||||
doc = self._context_node._doc
|
||||
else:
|
||||
doc = None
|
||||
if doc is None:
|
||||
doc = self._doc
|
||||
if doc is None:
|
||||
s[0] = NULL
|
||||
return
|
||||
with nogil:
|
||||
r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
|
||||
self._xslt._c_style)
|
||||
if r == -1:
|
||||
raise MemoryError()
|
||||
|
||||
def __str__(self):
|
||||
cdef xmlChar* encoding
|
||||
cdef xmlChar* s = NULL
|
||||
cdef int l = 0
|
||||
self._saveToStringAndSize(&s, &l)
|
||||
if s is NULL:
|
||||
return ''
|
||||
encoding = self._xslt._c_style.encoding
|
||||
try:
|
||||
if encoding is NULL:
|
||||
result = s[:l].decode('UTF-8')
|
||||
else:
|
||||
result = s[:l].decode(encoding)
|
||||
finally:
|
||||
tree.xmlFree(s)
|
||||
return _stripEncodingDeclaration(result)
|
||||
|
||||
def __getbuffer__(self, Py_buffer* buffer, int flags):
|
||||
cdef int l = 0
|
||||
if buffer is NULL:
|
||||
return
|
||||
if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
|
||||
self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
|
||||
buffer.len = l
|
||||
if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
|
||||
self._buffer = <xmlChar*>buffer.buf
|
||||
self._buffer_len = l
|
||||
self._buffer_refcnt = 1
|
||||
else:
|
||||
buffer.buf = self._buffer
|
||||
buffer.len = self._buffer_len
|
||||
self._buffer_refcnt += 1
|
||||
if flags & python.PyBUF_WRITABLE:
|
||||
buffer.readonly = 0
|
||||
else:
|
||||
buffer.readonly = 1
|
||||
if flags & python.PyBUF_FORMAT:
|
||||
buffer.format = "B"
|
||||
else:
|
||||
buffer.format = NULL
|
||||
buffer.ndim = 0
|
||||
buffer.shape = NULL
|
||||
buffer.strides = NULL
|
||||
buffer.suboffsets = NULL
|
||||
buffer.itemsize = 1
|
||||
buffer.internal = NULL
|
||||
if buffer.obj is not self: # set by Cython?
|
||||
buffer.obj = self
|
||||
|
||||
def __releasebuffer__(self, Py_buffer* buffer):
|
||||
if buffer is NULL:
|
||||
return
|
||||
if <xmlChar*>buffer.buf is self._buffer:
|
||||
self._buffer_refcnt -= 1
|
||||
if self._buffer_refcnt == 0:
|
||||
tree.xmlFree(<char*>self._buffer)
|
||||
self._buffer = NULL
|
||||
else:
|
||||
tree.xmlFree(<char*>buffer.buf)
|
||||
buffer.buf = NULL
|
||||
|
||||
property xslt_profile:
|
||||
"""Return an ElementTree with profiling data for the stylesheet run.
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef object root
|
||||
if self._profile is None:
|
||||
return None
|
||||
root = self._profile.getroot()
|
||||
if root is None:
|
||||
return None
|
||||
return ElementTree(root)
|
||||
|
||||
def __del__(self):
|
||||
self._profile = None
|
||||
|
||||
cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
|
||||
cdef _XSLTResultTree result
|
||||
result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
|
||||
result._xslt = xslt
|
||||
result._profile = profile
|
||||
return result
|
||||
|
||||
# functions like "output" and "write" are a potential security risk, but we
|
||||
# rely on the user to configure XSLTAccessControl as needed
|
||||
xslt.xsltRegisterAllExtras()
|
||||
|
||||
# enable EXSLT support for XSLT
|
||||
xslt.exsltRegisterAll()
|
||||
|
||||
|
||||
################################################################################
|
||||
# XSLT PI support
|
||||
|
||||
cdef object _RE_PI_HREF = re.compile(r'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
|
||||
cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
|
||||
cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
|
||||
cdef XPath __findStylesheetByID = None
|
||||
|
||||
cdef _findStylesheetByID(_Document doc, id):
|
||||
global __findStylesheetByID
|
||||
if __findStylesheetByID is None:
|
||||
__findStylesheetByID = XPath(
|
||||
"//xsl:stylesheet[@xml:id = $id]",
|
||||
namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"})
|
||||
return __findStylesheetByID(doc, id=id)
|
||||
|
||||
cdef class _XSLTProcessingInstruction(PIBase):
|
||||
def parseXSL(self, parser=None):
|
||||
"""parseXSL(self, parser=None)
|
||||
|
||||
Try to parse the stylesheet referenced by this PI and return
|
||||
an ElementTree for it. If the stylesheet is embedded in the
|
||||
same document (referenced via xml:id), find and return an
|
||||
ElementTree for the stylesheet Element.
|
||||
|
||||
The optional ``parser`` keyword argument can be passed to specify the
|
||||
parser used to read from external stylesheet URLs.
|
||||
"""
|
||||
cdef _Document result_doc
|
||||
cdef _Element result_node
|
||||
cdef bytes href_utf
|
||||
cdef const_xmlChar* c_href
|
||||
cdef xmlAttr* c_attr
|
||||
_assertValidNode(self)
|
||||
if self._c_node.content is NULL:
|
||||
raise ValueError, "PI lacks content"
|
||||
hrefs = _FIND_PI_HREF(' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
|
||||
if len(hrefs) != 1:
|
||||
raise ValueError, "malformed PI attributes"
|
||||
hrefs = hrefs[0]
|
||||
href_utf = utf8(hrefs[0] or hrefs[1])
|
||||
c_href = _xcstr(href_utf)
|
||||
|
||||
if c_href[0] != c'#':
|
||||
# normal URL, try to parse from it
|
||||
c_href = tree.xmlBuildURI(
|
||||
c_href,
|
||||
tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
|
||||
if c_href is not NULL:
|
||||
try:
|
||||
href_utf = <unsigned char*>c_href
|
||||
finally:
|
||||
tree.xmlFree(<char*>c_href)
|
||||
result_doc = _parseDocumentFromURL(href_utf, parser)
|
||||
return _elementTreeFactory(result_doc, None)
|
||||
|
||||
# ID reference to embedded stylesheet
|
||||
# try XML:ID lookup
|
||||
_assertValidDoc(self._doc)
|
||||
c_href += 1 # skip leading '#'
|
||||
c_attr = tree.xmlGetID(self._c_node.doc, c_href)
|
||||
if c_attr is not NULL and c_attr.doc is self._c_node.doc:
|
||||
result_node = _elementFactory(self._doc, c_attr.parent)
|
||||
return _elementTreeFactory(result_node._doc, result_node)
|
||||
|
||||
# try XPath search
|
||||
root = _findStylesheetByID(self._doc, funicode(c_href))
|
||||
if not root:
|
||||
raise ValueError, "reference to non-existing embedded stylesheet"
|
||||
elif len(root) > 1:
|
||||
raise ValueError, "ambiguous reference to embedded stylesheet"
|
||||
result_node = root[0]
|
||||
return _elementTreeFactory(result_node._doc, result_node)
|
||||
|
||||
def set(self, key, value):
|
||||
"""set(self, key, value)
|
||||
|
||||
Supports setting the 'href' pseudo-attribute in the text of
|
||||
the processing instruction.
|
||||
"""
|
||||
if key != "href":
|
||||
raise AttributeError, \
|
||||
"only setting the 'href' attribute is supported on XSLT-PIs"
|
||||
if value is None:
|
||||
attrib = ""
|
||||
elif '"' in value or '>' in value:
|
||||
raise ValueError, "Invalid URL, must not contain '\"' or '>'"
|
||||
else:
|
||||
attrib = f' href="{value}"'
|
||||
text = ' ' + self.text
|
||||
if _FIND_PI_HREF(text):
|
||||
self.text = _REPLACE_PI_HREF(attrib, text)
|
||||
else:
|
||||
self.text = text + attrib
|
Reference in New Issue
Block a user