week06
This commit is contained in:
215
env/lib/python3.12/site-packages/lxml/cleanup.pxi
vendored
Normal file
215
env/lib/python3.12/site-packages/lxml/cleanup.pxi
vendored
Normal file
@ -0,0 +1,215 @@
|
||||
# functions for tree cleanup and removing elements from subtrees
|
||||
|
||||
def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
|
||||
"""cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
|
||||
|
||||
Remove all namespace declarations from a subtree that are not used
|
||||
by any of the elements or attributes in that tree.
|
||||
|
||||
If a 'top_nsmap' is provided, it must be a mapping from prefixes
|
||||
to namespace URIs. These namespaces will be declared on the top
|
||||
element of the subtree before running the cleanup, which allows
|
||||
moving namespace declarations to the top of the tree.
|
||||
|
||||
If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
|
||||
These prefixes will not be removed as part of the cleanup.
|
||||
"""
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
c_element = element._c_node
|
||||
|
||||
if top_nsmap:
|
||||
doc = element._doc
|
||||
# declare namespaces from nsmap, then apply them to the subtree
|
||||
_setNodeNamespaces(c_element, doc, None, top_nsmap)
|
||||
moveNodeToDocument(doc, c_element.doc, c_element)
|
||||
|
||||
keep_ns_prefixes = (
|
||||
set([_utf8(prefix) for prefix in keep_ns_prefixes])
|
||||
if keep_ns_prefixes else None)
|
||||
|
||||
_removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
|
||||
|
||||
|
||||
def strip_attributes(tree_or_element, *attribute_names):
|
||||
"""strip_attributes(tree_or_element, *attribute_names)
|
||||
|
||||
Delete all attributes with the provided attribute names from an
|
||||
Element (or ElementTree) and its descendants.
|
||||
|
||||
Attribute names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_attributes(root_element,
|
||||
'simpleattr',
|
||||
'{http://some/ns}attrname',
|
||||
'{http://other/ns}*')
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not attribute_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
|
||||
matcher.cacheTags(element._doc)
|
||||
if matcher.rejectsAllAttributes():
|
||||
return
|
||||
_strip_attributes(element._c_node, matcher)
|
||||
|
||||
|
||||
cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
|
||||
cdef xmlAttr* c_attr
|
||||
cdef xmlAttr* c_next_attr
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
c_attr = c_node.properties
|
||||
while c_attr is not NULL:
|
||||
c_next_attr = c_attr.next
|
||||
if matcher.matchesAttribute(c_attr):
|
||||
tree.xmlRemoveProp(c_attr)
|
||||
c_attr = c_next_attr
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
|
||||
|
||||
def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
|
||||
"""strip_elements(tree_or_element, *tag_names, with_tail=True)
|
||||
|
||||
Delete all elements with the provided tag names from a tree or
|
||||
subtree. This will remove the elements and their entire subtree,
|
||||
including all their attributes, text content and descendants. It
|
||||
will also remove the tail text of the element unless you
|
||||
explicitly set the ``with_tail`` keyword argument option to False.
|
||||
|
||||
Tag names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Note that this will not delete the element (or ElementTree root
|
||||
element) that you passed even if it matches. It will only treat
|
||||
its descendants. If you want to include the root element, check
|
||||
its tag name directly before even calling this function.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_elements(some_element,
|
||||
'simpletagname', # non-namespaced tag
|
||||
'{http://some/ns}tagname', # namespaced tag
|
||||
'{http://some/other/ns}*' # any tag from a namespace
|
||||
lxml.etree.Comment # comments
|
||||
)
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
doc = _documentOrRaise(tree_or_element)
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not tag_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
|
||||
matcher.cacheTags(doc)
|
||||
if matcher.rejectsAll():
|
||||
return
|
||||
|
||||
if isinstance(tree_or_element, _ElementTree):
|
||||
# include PIs and comments next to the root node
|
||||
if matcher.matchesType(tree.XML_COMMENT_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
|
||||
if matcher.matchesType(tree.XML_PI_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
|
||||
_strip_elements(doc, element._c_node, matcher, with_tail)
|
||||
|
||||
cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
|
||||
bint with_tail):
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_next
|
||||
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
# we run through the children here to prevent any problems
|
||||
# with the tree iteration which would occur if we unlinked the
|
||||
# c_node itself
|
||||
c_child = _findChildForwards(c_node, 0)
|
||||
while c_child is not NULL:
|
||||
c_next = _nextElement(c_child)
|
||||
if matcher.matches(c_child):
|
||||
if c_child.type == tree.XML_ELEMENT_NODE:
|
||||
if not with_tail:
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
_removeNode(doc, c_child)
|
||||
else:
|
||||
if with_tail:
|
||||
_removeText(c_child.next)
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
attemptDeallocation(c_child)
|
||||
c_child = c_next
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
|
||||
|
||||
def strip_tags(tree_or_element, *tag_names):
|
||||
"""strip_tags(tree_or_element, *tag_names)
|
||||
|
||||
Delete all elements with the provided tag names from a tree or
|
||||
subtree. This will remove the elements and their attributes, but
|
||||
*not* their text/tail content or descendants. Instead, it will
|
||||
merge the text content and children of the element into its
|
||||
parent.
|
||||
|
||||
Tag names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Note that this will not delete the element (or ElementTree root
|
||||
element) that you passed even if it matches. It will only treat
|
||||
its descendants.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_tags(some_element,
|
||||
'simpletagname', # non-namespaced tag
|
||||
'{http://some/ns}tagname', # namespaced tag
|
||||
'{http://some/other/ns}*' # any tag from a namespace
|
||||
Comment # comments (including their text!)
|
||||
)
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
doc = _documentOrRaise(tree_or_element)
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not tag_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
|
||||
matcher.cacheTags(doc)
|
||||
if matcher.rejectsAll():
|
||||
return
|
||||
|
||||
if isinstance(tree_or_element, _ElementTree):
|
||||
# include PIs and comments next to the root node
|
||||
if matcher.matchesType(tree.XML_COMMENT_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
|
||||
if matcher.matchesType(tree.XML_PI_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
|
||||
_strip_tags(doc, element._c_node, matcher)
|
||||
|
||||
cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_next
|
||||
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
# we run through the children here to prevent any problems
|
||||
# with the tree iteration which would occur if we unlinked the
|
||||
# c_node itself
|
||||
c_child = _findChildForwards(c_node, 0)
|
||||
while c_child is not NULL:
|
||||
if not matcher.matches(c_child):
|
||||
c_child = _nextElement(c_child)
|
||||
continue
|
||||
if c_child.type == tree.XML_ELEMENT_NODE:
|
||||
c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
|
||||
_replaceNodeByChildren(doc, c_child)
|
||||
if not attemptDeallocation(c_child):
|
||||
if c_child.nsDef is not NULL:
|
||||
# make namespaces absolute
|
||||
moveNodeToDocument(doc, doc._c_doc, c_child)
|
||||
c_child = c_next
|
||||
else:
|
||||
c_next = _nextElement(c_child)
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
attemptDeallocation(c_child)
|
||||
c_child = c_next
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
Reference in New Issue
Block a user