Element Strip

Internal utility element_strip() used by mixin_paragraph, Reference.

Functions:

Name	Description
`strip_elements`	Remove the tags of specified sub-elements within a given element,
`strip_tags`	Recursively remove a selection of tags from an element, preserving

_strip_tags

_strip_tags(
    element: Element,
    strip: Iterable[str],
    protect: Iterable[str],
    protected: bool,
) -> tuple[Element | list[Element | str], bool]

Internal recursive helper for strip_tags.

Parameters:

Name	Type	Description	Default
`element`	`Element`	The current element to process.	required
`strip`	`Iterable[str]`	Tags to be stripped.	required
`protect`	`Iterable[str]`	Tags to be protected.	required
`protected`	`bool`	A flag indicating if the current element is under a protected parent.	required

Returns:

Type	Description
`tuple[Element \| list[Element \| str], bool]`	tuple[Element \| list[Element \| str], bool]: A tuple containing the processed element or list of elements, and a boolean indicating if any modification occurred.

Source code in odfdo/element_strip.py

def _strip_tags(
    element: Element,
    strip: Iterable[str],
    protect: Iterable[str],
    protected: bool,
) -> tuple[Element | list[Element | str], bool]:
    """Internal recursive helper for `strip_tags`.

    Args:
        element (Element): The current element to process.
        strip (Iterable[str]): Tags to be stripped.
        protect (Iterable[str]): Tags to be protected.
        protected (bool): A flag indicating if the current element is under a
            protected parent.

    Returns:
        tuple[Element | list[Element | str], bool]: A tuple containing the
            processed element or list of elements, and a boolean indicating
            if any modification occurred.
    """
    element_clone = element.clone
    modified = False
    children: list[Element | str] = []
    if protect and element.tag in protect:
        protect_below = True
    else:
        protect_below = False
    for child in element_clone.children:
        striped_child, is_modified = _strip_tags(child, strip, protect, protect_below)
        if is_modified:
            modified = True
        if isinstance(striped_child, list):
            children.extend(cast(list[Element | str], striped_child))
        else:
            children.append(striped_child)

    text = element_clone.text
    tail = element_clone.tail
    if not protected and strip and element.tag in strip:
        element_result: list[Element | str] = []
        if text:
            element_result.append(text)
        for child2 in children:
            element_result.append(child2)
        if tail is not None:
            element_result.append(tail)
        return (element_result, True)
    else:
        if not modified:
            return (element, False)
        element.clear()
        try:
            for key, value in element_clone.attributes.items():
                element.set_attribute(key, value)
        except ValueError:
            sys.stderr.write(f"strip_tags(): bad attribute in {element_clone}\n")
        if text:
            element._Element__append(text)
        for child3 in children:
            element._Element__append(child3)
        if tail is not None:
            element.tail = tail
        return (element, True)

strip_elements

strip_elements(
    element: Element,
    sub_elements: Element | Iterable[Element],
) -> Element | list[Element | str]

Remove the tags of specified sub-elements within a given element, preserving their inner children and text content.

Warning: This function modifies the sub_elements in place and does not create clones.

Parameters:

Name	Type	Description	Default
`element`	`Element`	The parent element from which tags will be stripped.	required
`sub_elements`	`Element \| Iterable[Element]`	An element or a list of elements whose tags should be stripped.	required

Returns:

Type	Description
`Element \| list[Element \| str]`	Element \| list[Element \| str]: The modified element. If the top-level element itself is stripped, it may return a list of its children and text content.

Source code in odfdo/element_strip.py

def strip_elements(
    element: Element,
    sub_elements: Element | Iterable[Element],
) -> Element | list[Element | str]:
    """Remove the tags of specified sub-elements within a given element,
    preserving their inner children and text content.

    Warning: This function modifies the `sub_elements` in place and does not
    create clones.

    Args:
        element: The parent element from which tags will be stripped.
        sub_elements: An element or a list of
            elements whose tags should be stripped.

    Returns:
        Element | list[Element | str]: The modified element. If the top-level
            element itself is stripped, it may return a list of its children
            and text content.
    """
    if not sub_elements:
        return element
    if isinstance(sub_elements, Element):
        sub_elements = (sub_elements,)
    replacer = _get_lxml_tag("text:this-will-be-removed")
    for elem in sub_elements:
        elem._xml_element.tag = replacer
    strip = ("text:this-will-be-removed",)
    return strip_tags(element, strip=strip, default=None)

strip_tags

strip_tags(
    element: Element,
    strip: Iterable[str] | None = None,
    protect: Iterable[str] | None = None,
    default: str | None = "text:p",
) -> Element | list[Element | str]

Recursively remove a selection of tags from an element, preserving their inner content (children and text).

This function is analogous to lxml.etree.strip_tags.

Parameters:

Name	Type	Description	Default
`element`	`Element`	The element to strip tags from.	required
`strip`	`Iterable[str] \| None`	A list of qualified ODF tag names (e.g., “text:span”) to remove. If `None`, no tags are stripped.	`None`
`protect`	`Iterable[str] \| None`	A list of qualified ODF tag names that should not be stripped. The protection applies to the element itself but not its descendants.	`None`
`default`	`str \| None`	If the top-level `element` is stripped, its content (children and text) is wrapped in a new element with this tag name. If `None`, a list of `Element` and `str` objects is returned instead. Defaults to “text:p”.	`'text:p'`

Returns:

Type	Description
`Element \| list[Element \| str]`	Element \| list[Element \| str]: The modified element. If the top-level `element` is stripped and `default` is `None`, a list of its children and text content is returned.

Source code in odfdo/element_strip.py

def strip_tags(
    element: Element,
    strip: Iterable[str] | None = None,
    protect: Iterable[str] | None = None,
    default: str | None = "text:p",
) -> Element | list[Element | str]:
    """Recursively remove a selection of tags from an element, preserving
    their inner content (children and text).

    This function is analogous to `lxml.etree.strip_tags`.

    Args:
        element: The element to strip tags from.
        strip: A list of qualified ODF tag names
            (e.g., "text:span") to remove. If `None`, no tags are stripped.
        protect: A list of qualified ODF tag names
            that should not be stripped. The protection applies to the
            element itself but not its descendants.
        default: If the top-level `element` is stripped, its
            content (children and text) is wrapped in a new element with this
            tag name. If `None`, a list of `Element` and `str` objects is
            returned instead. Defaults to "text:p".

    Returns:
        Element | list[Element | str]: The modified element. If the top-level
            `element` is stripped and `default` is `None`, a list of its
            children and text content is returned.
    """
    if not strip:
        return element
    if not protect:
        protect = ()
    protected = False
    result: Element | list[Element | str] = []
    result, modified = _strip_tags(element, strip, protect, protected)
    if modified and isinstance(result, list) and default:
        new: Element = Element.from_tag(default)
        for content in result:
            new._Element__append(content)
        result = new
    return result