week06
This commit is contained in:
487
env/lib/python3.12/site-packages/lxml/xpath.pxi
vendored
Normal file
487
env/lib/python3.12/site-packages/lxml/xpath.pxi
vendored
Normal file
@ -0,0 +1,487 @@
|
||||
# XPath evaluation
|
||||
|
||||
class XPathSyntaxError(LxmlSyntaxError, XPathError):
|
||||
pass
|
||||
|
||||
################################################################################
|
||||
# XPath
|
||||
|
||||
cdef object _XPATH_SYNTAX_ERRORS = (
|
||||
xmlerror.XML_XPATH_NUMBER_ERROR,
|
||||
xmlerror.XML_XPATH_UNFINISHED_LITERAL_ERROR,
|
||||
xmlerror.XML_XPATH_VARIABLE_REF_ERROR,
|
||||
xmlerror.XML_XPATH_INVALID_PREDICATE_ERROR,
|
||||
xmlerror.XML_XPATH_UNCLOSED_ERROR,
|
||||
xmlerror.XML_XPATH_INVALID_CHAR_ERROR
|
||||
)
|
||||
|
||||
cdef object _XPATH_EVAL_ERRORS = (
|
||||
xmlerror.XML_XPATH_UNDEF_VARIABLE_ERROR,
|
||||
xmlerror.XML_XPATH_UNDEF_PREFIX_ERROR,
|
||||
xmlerror.XML_XPATH_UNKNOWN_FUNC_ERROR,
|
||||
xmlerror.XML_XPATH_INVALID_OPERAND,
|
||||
xmlerror.XML_XPATH_INVALID_TYPE,
|
||||
xmlerror.XML_XPATH_INVALID_ARITY,
|
||||
xmlerror.XML_XPATH_INVALID_CTXT_SIZE,
|
||||
xmlerror.XML_XPATH_INVALID_CTXT_POSITION
|
||||
)
|
||||
|
||||
cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf) noexcept:
|
||||
if ns_utf is None:
|
||||
return xpath.xmlXPathRegisterFunc(
|
||||
<xpath.xmlXPathContext*>ctxt, _xcstr(name_utf),
|
||||
_xpath_function_call)
|
||||
else:
|
||||
return xpath.xmlXPathRegisterFuncNS(
|
||||
<xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
|
||||
_xpath_function_call)
|
||||
|
||||
cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf) noexcept:
|
||||
if ns_utf is None:
|
||||
return xpath.xmlXPathRegisterFunc(
|
||||
<xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), NULL)
|
||||
else:
|
||||
return xpath.xmlXPathRegisterFuncNS(
|
||||
<xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), NULL)
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _XPathContext(_BaseContext):
|
||||
cdef object _variables
|
||||
def __init__(self, namespaces, extensions, error_log, enable_regexp, variables,
|
||||
build_smart_strings):
|
||||
self._variables = variables
|
||||
_BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
|
||||
build_smart_strings)
|
||||
|
||||
cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
|
||||
self._set_xpath_context(xpathCtxt)
|
||||
# This would be a good place to set up the XPath parser dict, but
|
||||
# we cannot use the current thread dict as we do not know which
|
||||
# thread will execute the XPath evaluator - so, no dict for now.
|
||||
self.registerLocalNamespaces()
|
||||
self.registerLocalFunctions(xpathCtxt, _register_xpath_function)
|
||||
|
||||
cdef register_context(self, _Document doc):
|
||||
self._register_context(doc)
|
||||
self.registerGlobalNamespaces()
|
||||
self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
|
||||
self.registerExsltFunctions()
|
||||
if self._variables is not None:
|
||||
self.registerVariables(self._variables)
|
||||
|
||||
cdef unregister_context(self):
|
||||
self.unregisterGlobalFunctions(
|
||||
self._xpathCtxt, _unregister_xpath_function)
|
||||
self.unregisterGlobalNamespaces()
|
||||
xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
|
||||
self._cleanup_context()
|
||||
|
||||
cdef void registerExsltFunctions(self) noexcept:
|
||||
if xslt.LIBXSLT_VERSION < 10125:
|
||||
# we'd only execute dummy functions anyway
|
||||
return
|
||||
tree.xmlHashScan(
|
||||
self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces,
|
||||
self._xpathCtxt)
|
||||
|
||||
cdef registerVariables(self, variable_dict):
|
||||
for name, value in variable_dict.items():
|
||||
name_utf = self._to_utf(name)
|
||||
xpath.xmlXPathRegisterVariable(
|
||||
self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None))
|
||||
|
||||
cdef registerVariable(self, name, value):
|
||||
name_utf = self._to_utf(name)
|
||||
xpath.xmlXPathRegisterVariable(
|
||||
self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None))
|
||||
|
||||
|
||||
cdef void _registerExsltFunctionsForNamespaces(
|
||||
void* _c_href, void* _ctxt, const_xmlChar* c_prefix) noexcept:
|
||||
c_href = <const_xmlChar*> _c_href
|
||||
ctxt = <xpath.xmlXPathContext*> _ctxt
|
||||
|
||||
if tree.xmlStrcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0:
|
||||
xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix)
|
||||
elif tree.xmlStrcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0:
|
||||
xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix)
|
||||
elif tree.xmlStrcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0:
|
||||
xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix)
|
||||
elif tree.xmlStrcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0:
|
||||
xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix)
|
||||
|
||||
|
||||
cdef class _XPathEvaluatorBase:
|
||||
cdef xpath.xmlXPathContext* _xpathCtxt
|
||||
cdef _XPathContext _context
|
||||
cdef python.PyThread_type_lock _eval_lock
|
||||
cdef _ErrorLog _error_log
|
||||
def __cinit__(self):
|
||||
self._xpathCtxt = NULL
|
||||
if config.ENABLE_THREADING:
|
||||
self._eval_lock = python.PyThread_allocate_lock()
|
||||
if self._eval_lock is NULL:
|
||||
raise MemoryError()
|
||||
self._error_log = _ErrorLog()
|
||||
|
||||
def __init__(self, namespaces, extensions, enable_regexp,
|
||||
smart_strings):
|
||||
self._context = _XPathContext(namespaces, extensions, self._error_log,
|
||||
enable_regexp, None, smart_strings)
|
||||
|
||||
@property
|
||||
def error_log(self):
|
||||
assert self._error_log is not None, "XPath evaluator not initialised"
|
||||
return self._error_log.copy()
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._xpathCtxt is not NULL:
|
||||
xpath.xmlXPathFreeContext(self._xpathCtxt)
|
||||
if config.ENABLE_THREADING:
|
||||
if self._eval_lock is not NULL:
|
||||
python.PyThread_free_lock(self._eval_lock)
|
||||
|
||||
cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
|
||||
self._xpathCtxt = xpathCtxt
|
||||
self._context.set_context(xpathCtxt)
|
||||
|
||||
cdef bint _checkAbsolutePath(self, char* path) noexcept:
|
||||
cdef char c
|
||||
if path is NULL:
|
||||
return 0
|
||||
c = path[0]
|
||||
while c == c' ' or c == c'\t':
|
||||
path = path + 1
|
||||
c = path[0]
|
||||
return c == c'/'
|
||||
|
||||
@cython.final
|
||||
cdef int _lock(self) except -1:
|
||||
cdef int result
|
||||
if config.ENABLE_THREADING and self._eval_lock != NULL:
|
||||
with nogil:
|
||||
result = python.PyThread_acquire_lock(
|
||||
self._eval_lock, python.WAIT_LOCK)
|
||||
if result == 0:
|
||||
raise XPathError, "XPath evaluator locking failed"
|
||||
return 0
|
||||
|
||||
@cython.final
|
||||
cdef void _unlock(self) noexcept:
|
||||
if config.ENABLE_THREADING and self._eval_lock != NULL:
|
||||
python.PyThread_release_lock(self._eval_lock)
|
||||
|
||||
cdef _build_parse_error(self):
|
||||
cdef _BaseErrorLog entries
|
||||
entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS)
|
||||
if entries:
|
||||
message = entries._buildExceptionMessage(None)
|
||||
if message is not None:
|
||||
return XPathSyntaxError(message, self._error_log)
|
||||
return XPathSyntaxError(
|
||||
self._error_log._buildExceptionMessage("Error in xpath expression"),
|
||||
self._error_log)
|
||||
|
||||
cdef _build_eval_error(self):
|
||||
cdef _BaseErrorLog entries
|
||||
entries = self._error_log.filter_types(_XPATH_EVAL_ERRORS)
|
||||
if not entries:
|
||||
entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS)
|
||||
if entries:
|
||||
message = entries._buildExceptionMessage(None)
|
||||
if message is not None:
|
||||
return XPathEvalError(message, self._error_log)
|
||||
return XPathEvalError(
|
||||
self._error_log._buildExceptionMessage("Error in xpath expression"),
|
||||
self._error_log)
|
||||
|
||||
cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc):
|
||||
if self._context._exc._has_raised():
|
||||
if xpathObj is not NULL:
|
||||
_freeXPathObject(xpathObj)
|
||||
xpathObj = NULL
|
||||
self._context._release_temp_refs()
|
||||
self._context._exc._raise_if_stored()
|
||||
|
||||
if xpathObj is NULL:
|
||||
self._context._release_temp_refs()
|
||||
raise self._build_eval_error()
|
||||
|
||||
try:
|
||||
result = _unwrapXPathObject(xpathObj, doc, self._context)
|
||||
finally:
|
||||
_freeXPathObject(xpathObj)
|
||||
self._context._release_temp_refs()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
cdef class XPathElementEvaluator(_XPathEvaluatorBase):
|
||||
"""XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True, smart_strings=True)
|
||||
Create an XPath evaluator for an element.
|
||||
|
||||
Absolute XPath expressions (starting with '/') will be evaluated against
|
||||
the ElementTree as returned by getroottree().
|
||||
|
||||
Additional namespace declarations can be passed with the
|
||||
'namespace' keyword argument. EXSLT regular expression support
|
||||
can be disabled with the 'regexp' boolean keyword (defaults to
|
||||
True). Smart strings will be returned for string results unless
|
||||
you pass ``smart_strings=False``.
|
||||
"""
|
||||
cdef _Element _element
|
||||
def __init__(self, _Element element not None, *, namespaces=None,
|
||||
extensions=None, regexp=True, smart_strings=True):
|
||||
cdef xpath.xmlXPathContext* xpathCtxt
|
||||
cdef int ns_register_status
|
||||
cdef _Document doc
|
||||
_assertValidNode(element)
|
||||
_assertValidDoc(element._doc)
|
||||
self._element = element
|
||||
doc = element._doc
|
||||
_XPathEvaluatorBase.__init__(self, namespaces, extensions,
|
||||
regexp, smart_strings)
|
||||
xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
|
||||
if xpathCtxt is NULL:
|
||||
raise MemoryError()
|
||||
self.set_context(xpathCtxt)
|
||||
|
||||
def register_namespace(self, prefix, uri):
|
||||
"""Register a namespace with the XPath context.
|
||||
"""
|
||||
assert self._xpathCtxt is not NULL, "XPath context not initialised"
|
||||
self._context.addNamespace(prefix, uri)
|
||||
|
||||
def register_namespaces(self, namespaces):
|
||||
"""Register a prefix -> uri dict.
|
||||
"""
|
||||
assert self._xpathCtxt is not NULL, "XPath context not initialised"
|
||||
for prefix, uri in namespaces.items():
|
||||
self._context.addNamespace(prefix, uri)
|
||||
|
||||
def __call__(self, _path, **_variables):
|
||||
"""__call__(self, _path, **_variables)
|
||||
|
||||
Evaluate an XPath expression on the document.
|
||||
|
||||
Variables may be provided as keyword arguments. Note that namespaces
|
||||
are currently not supported for variables.
|
||||
|
||||
Absolute XPath expressions (starting with '/') will be evaluated
|
||||
against the ElementTree as returned by getroottree().
|
||||
"""
|
||||
cdef xpath.xmlXPathObject* xpathObj
|
||||
cdef _Document doc
|
||||
assert self._xpathCtxt is not NULL, "XPath context not initialised"
|
||||
path = _utf8(_path)
|
||||
doc = self._element._doc
|
||||
|
||||
self._lock()
|
||||
self._xpathCtxt.node = self._element._c_node
|
||||
try:
|
||||
self._context.register_context(doc)
|
||||
self._context.registerVariables(_variables)
|
||||
c_path = _xcstr(path)
|
||||
with nogil:
|
||||
xpathObj = xpath.xmlXPathEvalExpression(
|
||||
c_path, self._xpathCtxt)
|
||||
result = self._handle_result(xpathObj, doc)
|
||||
finally:
|
||||
self._context.unregister_context()
|
||||
self._unlock()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
|
||||
"""XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True, smart_strings=True)
|
||||
Create an XPath evaluator for an ElementTree.
|
||||
|
||||
Additional namespace declarations can be passed with the
|
||||
'namespace' keyword argument. EXSLT regular expression support
|
||||
can be disabled with the 'regexp' boolean keyword (defaults to
|
||||
True). Smart strings will be returned for string results unless
|
||||
you pass ``smart_strings=False``.
|
||||
"""
|
||||
def __init__(self, _ElementTree etree not None, *, namespaces=None,
|
||||
extensions=None, regexp=True, smart_strings=True):
|
||||
XPathElementEvaluator.__init__(
|
||||
self, etree._context_node, namespaces=namespaces,
|
||||
extensions=extensions, regexp=regexp,
|
||||
smart_strings=smart_strings)
|
||||
|
||||
def __call__(self, _path, **_variables):
|
||||
"""__call__(self, _path, **_variables)
|
||||
|
||||
Evaluate an XPath expression on the document.
|
||||
|
||||
Variables may be provided as keyword arguments. Note that namespaces
|
||||
are currently not supported for variables.
|
||||
"""
|
||||
cdef xpath.xmlXPathObject* xpathObj
|
||||
cdef xmlDoc* c_doc
|
||||
cdef _Document doc
|
||||
assert self._xpathCtxt is not NULL, "XPath context not initialised"
|
||||
path = _utf8(_path)
|
||||
doc = self._element._doc
|
||||
|
||||
self._lock()
|
||||
try:
|
||||
self._context.register_context(doc)
|
||||
c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
|
||||
try:
|
||||
self._context.registerVariables(_variables)
|
||||
c_path = _xcstr(path)
|
||||
with nogil:
|
||||
self._xpathCtxt.doc = c_doc
|
||||
self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
|
||||
xpathObj = xpath.xmlXPathEvalExpression(
|
||||
c_path, self._xpathCtxt)
|
||||
result = self._handle_result(xpathObj, doc)
|
||||
finally:
|
||||
_destroyFakeDoc(doc._c_doc, c_doc)
|
||||
self._context.unregister_context()
|
||||
finally:
|
||||
self._unlock()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None,
|
||||
regexp=True, smart_strings=True):
|
||||
"""XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True, smart_strings=True)
|
||||
|
||||
Creates an XPath evaluator for an ElementTree or an Element.
|
||||
|
||||
The resulting object can be called with an XPath expression as argument
|
||||
and XPath variables provided as keyword arguments.
|
||||
|
||||
Additional namespace declarations can be passed with the
|
||||
'namespace' keyword argument. EXSLT regular expression support
|
||||
can be disabled with the 'regexp' boolean keyword (defaults to
|
||||
True). Smart strings will be returned for string results unless
|
||||
you pass ``smart_strings=False``.
|
||||
"""
|
||||
if isinstance(etree_or_element, _ElementTree):
|
||||
return XPathDocumentEvaluator(
|
||||
etree_or_element, namespaces=namespaces,
|
||||
extensions=extensions, regexp=regexp, smart_strings=smart_strings)
|
||||
else:
|
||||
return XPathElementEvaluator(
|
||||
etree_or_element, namespaces=namespaces,
|
||||
extensions=extensions, regexp=regexp, smart_strings=smart_strings)
|
||||
|
||||
|
||||
cdef class XPath(_XPathEvaluatorBase):
|
||||
"""XPath(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True)
|
||||
A compiled XPath expression that can be called on Elements and ElementTrees.
|
||||
|
||||
Besides the XPath expression, you can pass prefix-namespace
|
||||
mappings and extension functions to the constructor through the
|
||||
keyword arguments ``namespaces`` and ``extensions``. EXSLT
|
||||
regular expression support can be disabled with the 'regexp'
|
||||
boolean keyword (defaults to True). Smart strings will be
|
||||
returned for string results unless you pass
|
||||
``smart_strings=False``.
|
||||
"""
|
||||
cdef xpath.xmlXPathCompExpr* _xpath
|
||||
cdef bytes _path
|
||||
def __cinit__(self):
|
||||
self._xpath = NULL
|
||||
|
||||
def __init__(self, path, *, namespaces=None, extensions=None,
|
||||
regexp=True, smart_strings=True):
|
||||
cdef xpath.xmlXPathContext* xpathCtxt
|
||||
_XPathEvaluatorBase.__init__(self, namespaces, extensions,
|
||||
regexp, smart_strings)
|
||||
self._path = _utf8(path)
|
||||
xpathCtxt = xpath.xmlXPathNewContext(NULL)
|
||||
if xpathCtxt is NULL:
|
||||
raise MemoryError()
|
||||
self.set_context(xpathCtxt)
|
||||
self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _xcstr(self._path))
|
||||
if self._xpath is NULL:
|
||||
raise self._build_parse_error()
|
||||
|
||||
def __call__(self, _etree_or_element, **_variables):
|
||||
"__call__(self, _etree_or_element, **_variables)"
|
||||
cdef xpath.xmlXPathObject* xpathObj
|
||||
cdef _Document document
|
||||
cdef _Element element
|
||||
|
||||
assert self._xpathCtxt is not NULL, "XPath context not initialised"
|
||||
document = _documentOrRaise(_etree_or_element)
|
||||
element = _rootNodeOrRaise(_etree_or_element)
|
||||
|
||||
self._lock()
|
||||
self._xpathCtxt.doc = document._c_doc
|
||||
self._xpathCtxt.node = element._c_node
|
||||
|
||||
try:
|
||||
self._context.register_context(document)
|
||||
self._context.registerVariables(_variables)
|
||||
with nogil:
|
||||
xpathObj = xpath.xmlXPathCompiledEval(
|
||||
self._xpath, self._xpathCtxt)
|
||||
result = self._handle_result(xpathObj, document)
|
||||
finally:
|
||||
self._context.unregister_context()
|
||||
self._unlock()
|
||||
return result
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
"""The literal XPath expression.
|
||||
"""
|
||||
return self._path.decode('UTF-8')
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._xpath is not NULL:
|
||||
xpath.xmlXPathFreeCompExpr(self._xpath)
|
||||
|
||||
def __repr__(self):
|
||||
return self.path
|
||||
|
||||
|
||||
cdef object _replace_strings = re.compile(b'("[^"]*")|(\'[^\']*\')').sub
|
||||
cdef object _find_namespaces = re.compile(b'({[^}]+})').findall
|
||||
|
||||
cdef class ETXPath(XPath):
|
||||
"""ETXPath(self, path, extensions=None, regexp=True, smart_strings=True)
|
||||
Special XPath class that supports the ElementTree {uri} notation for namespaces.
|
||||
|
||||
Note that this class does not accept the ``namespace`` keyword
|
||||
argument. All namespaces must be passed as part of the path
|
||||
string. Smart strings will be returned for string results unless
|
||||
you pass ``smart_strings=False``.
|
||||
"""
|
||||
def __init__(self, path, *, extensions=None, regexp=True,
|
||||
smart_strings=True):
|
||||
path, namespaces = self._nsextract_path(path)
|
||||
XPath.__init__(self, path, namespaces=namespaces,
|
||||
extensions=extensions, regexp=regexp,
|
||||
smart_strings=smart_strings)
|
||||
|
||||
cdef _nsextract_path(self, path):
|
||||
# replace {namespaces} by new prefixes
|
||||
cdef dict namespaces = {}
|
||||
cdef list namespace_defs = []
|
||||
cdef int i
|
||||
path_utf = _utf8(path)
|
||||
stripped_path = _replace_strings(b'', path_utf) # remove string literals
|
||||
i = 1
|
||||
for namespace_def in _find_namespaces(stripped_path):
|
||||
if namespace_def not in namespace_defs:
|
||||
prefix = python.PyBytes_FromFormat("__xpp%02d", i)
|
||||
i += 1
|
||||
namespace_defs.append(namespace_def)
|
||||
namespace = namespace_def[1:-1] # remove '{}'
|
||||
namespace = (<bytes>namespace).decode('utf8')
|
||||
namespaces[prefix.decode('utf8')] = namespace
|
||||
prefix_str = prefix + b':'
|
||||
# FIXME: this also replaces {namespaces} within strings!
|
||||
path_utf = path_utf.replace(namespace_def, prefix_str)
|
||||
path = path_utf.decode('utf8')
|
||||
return path, namespaces
|
Reference in New Issue
Block a user