Skip to content

Xmlpart

Provides the XmlPart base class for handling XML parts within ODF documents.

This module abstracts the interaction with XML content, allowing for easy access, manipulation, and serialization of specific XML parts of an ODF file.

Classes:

Name Description
XmlPart

Represents an XML part within an ODF document.

XmlPart

Represents an XML part within an ODF document.

This class provides an abstraction layer over the underlying XML library (lxml), allowing for easier manipulation of XML content within ODF parts (e.g., ‘content.xml’, ‘styles.xml’).

Attributes:

Name Type Description
part_name str

The name of the XML part (e.g., “content.xml”).

container Container

The ODF container associated with this XML part.

Methods:

Name Description
__init__

Initializes an XmlPart instance.

__repr__
custom_pretty_tree

Returns a pretty-printed version of the XML tree.

delete_element

Deletes a specified child element from the XML tree.

get_element

Returns the first element matching the XPath query.

get_elements

Returns a list of elements matching the XPath query.

pretty_serialize

Serializes the XML part to bytes with pretty-printing.

serialize

Serializes the XML part to bytes.

xpath

Applies an XPath query to the root of the XML part and its subtree.

Source code in odfdo/xmlpart.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
class XmlPart:
    """Represents an XML part within an ODF document.

    This class provides an abstraction layer over the underlying XML library
    (lxml), allowing for easier manipulation of XML content within ODF
    parts (e.g., 'content.xml', 'styles.xml').

    Attributes:
        part_name (str): The name of the XML part (e.g., "content.xml").
        container (Container): The ODF container associated with this XML part.
    """

    def __init__(self, part_name: str, container: Container) -> None:
        """Initializes an XmlPart instance.

        Args:
            part_name: The name of the XML part (e.g., "content.xml").
            container: The ODF container (zip file) that holds
                this XML part.
        """
        self.part_name = part_name
        self.container = container

        # Internal state
        self.__tree: _ElementTree | None = None
        self.__root: Element | None = None

    def _get_tree(self) -> _ElementTree:
        """Loads and returns the XML tree for the part.

        If the tree has not been loaded yet, it reads the part from the
        container and parses it.

        Returns:
            _ElementTree: The parsed XML ElementTree object.
        """
        if self.__tree is None:
            part = self.container.get_part(self.part_name)
            self.__tree = parse(BytesIO(part))  # type: ignore[arg-type]
        return self.__tree

    def __repr__(self) -> str:
        return f"<{self.__class__.__name__} part_name={self.part_name}>"

    # Public API

    @property
    def root(self) -> Element:
        """The root Element of this XML part.

        When accessed for the first time, it loads and parses the XML part
        into an Element object.
        """
        if self.__root is None:
            tree = self._get_tree()
            self.__root = Element.from_tag(tree.getroot())
        return self.__root

    def _get_body(self) -> Body:
        """Retrieves the document body ('office:body') from the root element.

        Returns:
            Body: The document body element.

        Raises:
            TypeError: If no 'office:body' element is found in the part.
        """
        body = self.root.document_body
        if not isinstance(body, Element):
            raise TypeError(f"No body found in {self.part_name!r}")
        return body

    @property
    def body(self) -> Body | OfficeSettings:
        """The document body element ('office:body').

        This property provides access to the main content body of the XML part.
        """
        return self._get_body()

    @body.setter
    def body(self, new_body: Element) -> None:
        """Sets the document body with a new Element.

        Args:
            new_body: The new 'office:body' element to set.
        """
        body = self._get_body()
        tail = body.tail
        body.clear()
        for item in new_body.children:
            body.append(item)
        if tail:  # pragma: nocover
            body.tail = tail

    def get_elements(self, xpath_query: str) -> list[Element]:
        """Returns a list of elements matching the XPath query.

        The XPath query is applied to the root of this XML part.

        Args:
            xpath_query: The XPath query string.

        Returns:
            list[Element]: A list of matching Element objects.
        """
        return self.root.get_elements(xpath_query)

    def get_element(self, xpath_query: str) -> Element | None:
        """Returns the first element matching the XPath query.

        The XPath query is applied to the root of this XML part.

        Args:
            xpath_query: The XPath query string.

        Returns:
            Element | None: The first matching Element object, or None if
                no match is found.
        """
        return self.root.get_element(xpath_query)

    def delete_element(self, child: Element) -> None:
        """Deletes a specified child element from the XML tree.

        Args:
            child: The child element to delete.
        """
        child.delete()

    def xpath(self, xpath_query: str) -> list[Element | EText]:
        """Applies an XPath query to the root of the XML part and its subtree.

        Args:
            xpath_query: The XPath query string.

        Returns:
            list[Element | EText]: A list of Element or EText instances
                matching the query.
        """
        return self.root.xpath(xpath_query)

    @property
    def clone(self) -> XmlPart:
        """Creates a deep copy of the XmlPart instance.

        The cloned part will have its own independent XML tree.

        Returns:
            XmlPart: A new XmlPart instance that is a clone of the original.
        """
        clone = object.__new__(self.__class__)
        for name in self.__dict__:
            if name == "container":
                setattr(clone, name, self.container.clone)
            elif name in ("_XmlPart__tree",):
                setattr(clone, name, None)
            else:
                value = getattr(self, name)
                value = deepcopy(value)
                setattr(clone, name, value)
        return clone

    def serialize(self, pretty: bool = False) -> bytes:
        """Serializes the XML part to bytes.

        Args:
            pretty: If True, the output XML will be pretty-printed.
                Defaults to False.

        Returns:
            bytes: The XML content as bytes, including the XML declaration.
        """
        if pretty:
            return self.pretty_serialize()
        xml_header = b'<?xml version="1.0" encoding="UTF-8"?>\n'
        tree = self._get_tree()
        bytes_tree = tostring(tree, encoding="unicode").encode("utf8")
        return xml_header + bytes_tree  # type: ignore[no-any-return]

    def pretty_serialize(self) -> bytes:
        """Serializes the XML part to bytes with pretty-printing.

        Returns:
            bytes: The pretty-printed XML content as bytes, including the
                XML declaration.
        """
        xml_header = b'<?xml version="1.0" encoding="UTF-8"?>\n'
        bytes_tree = tostring(
            self.custom_pretty_tree(),
            encoding="unicode",
        ).encode("utf8")
        return xml_header + bytes_tree  # type: ignore[no-any-return]

    def custom_pretty_tree(self) -> _ElementTree | _Element:
        """Returns a pretty-printed version of the XML tree.

        This method applies custom indentation for readability.

        Returns:
            _ElementTree | _Element: The pretty-printed XML tree or its root.
        """
        tree = self._get_tree()
        root = tree.getroot()
        return pretty_indent(root)

__root instance-attribute

__root: Element | None = None

__tree instance-attribute

__tree: _ElementTree | None = None

body property writable

body: Body | OfficeSettings

The document body element (‘office:body’).

This property provides access to the main content body of the XML part.

clone property

clone: XmlPart

Creates a deep copy of the XmlPart instance.

The cloned part will have its own independent XML tree.

Returns:

Name Type Description
XmlPart XmlPart

A new XmlPart instance that is a clone of the original.

container instance-attribute

container = container

part_name instance-attribute

part_name = part_name

root property

root: Element

The root Element of this XML part.

When accessed for the first time, it loads and parses the XML part into an Element object.

__init__

__init__(part_name: str, container: Container) -> None

Initializes an XmlPart instance.

Parameters:

Name Type Description Default
part_name str

The name of the XML part (e.g., “content.xml”).

required
container Container

The ODF container (zip file) that holds this XML part.

required
Source code in odfdo/xmlpart.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(self, part_name: str, container: Container) -> None:
    """Initializes an XmlPart instance.

    Args:
        part_name: The name of the XML part (e.g., "content.xml").
        container: The ODF container (zip file) that holds
            this XML part.
    """
    self.part_name = part_name
    self.container = container

    # Internal state
    self.__tree: _ElementTree | None = None
    self.__root: Element | None = None

__repr__

__repr__() -> str
Source code in odfdo/xmlpart.py
85
86
def __repr__(self) -> str:
    return f"<{self.__class__.__name__} part_name={self.part_name}>"

_get_body

_get_body() -> Body

Retrieves the document body (‘office:body’) from the root element.

Returns:

Name Type Description
Body Body

The document body element.

Raises:

Type Description
TypeError

If no ‘office:body’ element is found in the part.

Source code in odfdo/xmlpart.py
102
103
104
105
106
107
108
109
110
111
112
113
114
def _get_body(self) -> Body:
    """Retrieves the document body ('office:body') from the root element.

    Returns:
        Body: The document body element.

    Raises:
        TypeError: If no 'office:body' element is found in the part.
    """
    body = self.root.document_body
    if not isinstance(body, Element):
        raise TypeError(f"No body found in {self.part_name!r}")
    return body

_get_tree

_get_tree() -> _ElementTree

Loads and returns the XML tree for the part.

If the tree has not been loaded yet, it reads the part from the container and parses it.

Returns:

Name Type Description
_ElementTree _ElementTree

The parsed XML ElementTree object.

Source code in odfdo/xmlpart.py
71
72
73
74
75
76
77
78
79
80
81
82
83
def _get_tree(self) -> _ElementTree:
    """Loads and returns the XML tree for the part.

    If the tree has not been loaded yet, it reads the part from the
    container and parses it.

    Returns:
        _ElementTree: The parsed XML ElementTree object.
    """
    if self.__tree is None:
        part = self.container.get_part(self.part_name)
        self.__tree = parse(BytesIO(part))  # type: ignore[arg-type]
    return self.__tree

custom_pretty_tree

custom_pretty_tree() -> _ElementTree | _Element

Returns a pretty-printed version of the XML tree.

This method applies custom indentation for readability.

Returns:

Type Description
_ElementTree | _Element

_ElementTree | _Element: The pretty-printed XML tree or its root.

Source code in odfdo/xmlpart.py
238
239
240
241
242
243
244
245
246
247
248
def custom_pretty_tree(self) -> _ElementTree | _Element:
    """Returns a pretty-printed version of the XML tree.

    This method applies custom indentation for readability.

    Returns:
        _ElementTree | _Element: The pretty-printed XML tree or its root.
    """
    tree = self._get_tree()
    root = tree.getroot()
    return pretty_indent(root)

delete_element

delete_element(child: Element) -> None

Deletes a specified child element from the XML tree.

Parameters:

Name Type Description Default
child Element

The child element to delete.

required
Source code in odfdo/xmlpart.py
166
167
168
169
170
171
172
def delete_element(self, child: Element) -> None:
    """Deletes a specified child element from the XML tree.

    Args:
        child: The child element to delete.
    """
    child.delete()

get_element

get_element(xpath_query: str) -> Element | None

Returns the first element matching the XPath query.

The XPath query is applied to the root of this XML part.

Parameters:

Name Type Description Default
xpath_query str

The XPath query string.

required

Returns:

Type Description
Element | None

Element | None: The first matching Element object, or None if no match is found.

Source code in odfdo/xmlpart.py
152
153
154
155
156
157
158
159
160
161
162
163
164
def get_element(self, xpath_query: str) -> Element | None:
    """Returns the first element matching the XPath query.

    The XPath query is applied to the root of this XML part.

    Args:
        xpath_query: The XPath query string.

    Returns:
        Element | None: The first matching Element object, or None if
            no match is found.
    """
    return self.root.get_element(xpath_query)

get_elements

get_elements(xpath_query: str) -> list[Element]

Returns a list of elements matching the XPath query.

The XPath query is applied to the root of this XML part.

Parameters:

Name Type Description Default
xpath_query str

The XPath query string.

required

Returns:

Type Description
list[Element]

list[Element]: A list of matching Element objects.

Source code in odfdo/xmlpart.py
139
140
141
142
143
144
145
146
147
148
149
150
def get_elements(self, xpath_query: str) -> list[Element]:
    """Returns a list of elements matching the XPath query.

    The XPath query is applied to the root of this XML part.

    Args:
        xpath_query: The XPath query string.

    Returns:
        list[Element]: A list of matching Element objects.
    """
    return self.root.get_elements(xpath_query)

pretty_serialize

pretty_serialize() -> bytes

Serializes the XML part to bytes with pretty-printing.

Returns:

Name Type Description
bytes bytes

The pretty-printed XML content as bytes, including the XML declaration.

Source code in odfdo/xmlpart.py
224
225
226
227
228
229
230
231
232
233
234
235
236
def pretty_serialize(self) -> bytes:
    """Serializes the XML part to bytes with pretty-printing.

    Returns:
        bytes: The pretty-printed XML content as bytes, including the
            XML declaration.
    """
    xml_header = b'<?xml version="1.0" encoding="UTF-8"?>\n'
    bytes_tree = tostring(
        self.custom_pretty_tree(),
        encoding="unicode",
    ).encode("utf8")
    return xml_header + bytes_tree  # type: ignore[no-any-return]

serialize

serialize(pretty: bool = False) -> bytes

Serializes the XML part to bytes.

Parameters:

Name Type Description Default
pretty bool

If True, the output XML will be pretty-printed. Defaults to False.

False

Returns:

Name Type Description
bytes bytes

The XML content as bytes, including the XML declaration.

Source code in odfdo/xmlpart.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def serialize(self, pretty: bool = False) -> bytes:
    """Serializes the XML part to bytes.

    Args:
        pretty: If True, the output XML will be pretty-printed.
            Defaults to False.

    Returns:
        bytes: The XML content as bytes, including the XML declaration.
    """
    if pretty:
        return self.pretty_serialize()
    xml_header = b'<?xml version="1.0" encoding="UTF-8"?>\n'
    tree = self._get_tree()
    bytes_tree = tostring(tree, encoding="unicode").encode("utf8")
    return xml_header + bytes_tree  # type: ignore[no-any-return]

xpath

xpath(xpath_query: str) -> list[Element | EText]

Applies an XPath query to the root of the XML part and its subtree.

Parameters:

Name Type Description Default
xpath_query str

The XPath query string.

required

Returns:

Type Description
list[Element | EText]

list[Element | EText]: A list of Element or EText instances matching the query.

Source code in odfdo/xmlpart.py
174
175
176
177
178
179
180
181
182
183
184
def xpath(self, xpath_query: str) -> list[Element | EText]:
    """Applies an XPath query to the root of the XML part and its subtree.

    Args:
        xpath_query: The XPath query string.

    Returns:
        list[Element | EText]: A list of Element or EText instances
            matching the query.
    """
    return self.root.xpath(xpath_query)