Skip to content

Mixin Paragraph Formatted

Mixin class for Paragraph.get_formatted_text().

Classes:

Name Description
ParaFormattedTextMixin

Mixin class providing the get_formatted_text method for paragraph-like elements.

Attributes:

Name Type Description
RE_SP_POST
RE_SP_PRE

RE_SP_POST module-attribute

RE_SP_POST = compile('\\s*$')

RE_SP_PRE module-attribute

RE_SP_PRE = compile('^\\s*')

ParaFormattedTextMixin

Mixin class providing the get_formatted_text method for paragraph-like elements.

Methods:

Name Description
get_formatted_text

Get the formatted text content of the paragraph-like element.

Source code in odfdo/mixin_paragraph_formatted.py
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
class ParaFormattedTextMixin:
    """Mixin class providing the `get_formatted_text` method for paragraph-like elements."""

    def get_formatted_text(
        self,
        context: dict | None = None,
        simple: bool = False,
    ) -> str:
        """Get the formatted text content of the paragraph-like element.

        Args:
            context: A dictionary providing context for formatting.
            simple: If True, returns only the content string. If False,
                adds two newlines at the end.

        Returns:
            str: The formatted text content.
        """
        if not context:
            context = {
                "document": None,
                "footnotes": [],
                "endnotes": [],
                "annotations": [],
                "rst_mode": False,
                "img_counter": 0,
                "images": [],
                "no_img_level": 0,
            }
        content = _formatted_text(self, context)  # type:ignore[arg-type]
        if simple:
            return content
        else:
            return content + "\n\n"

get_formatted_text

get_formatted_text(
    context: dict | None = None, simple: bool = False
) -> str

Get the formatted text content of the paragraph-like element.

Parameters:

Name Type Description Default
context dict | None

A dictionary providing context for formatting.

None
simple bool

If True, returns only the content string. If False, adds two newlines at the end.

False

Returns:

Name Type Description
str str

The formatted text content.

Source code in odfdo/mixin_paragraph_formatted.py
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def get_formatted_text(
    self,
    context: dict | None = None,
    simple: bool = False,
) -> str:
    """Get the formatted text content of the paragraph-like element.

    Args:
        context: A dictionary providing context for formatting.
        simple: If True, returns only the content string. If False,
            adds two newlines at the end.

    Returns:
        str: The formatted text content.
    """
    if not context:
        context = {
            "document": None,
            "footnotes": [],
            "endnotes": [],
            "annotations": [],
            "rst_mode": False,
            "img_counter": 0,
            "images": [],
            "no_img_level": 0,
        }
    content = _formatted_text(self, context)  # type:ignore[arg-type]
    if simple:
        return content
    else:
        return content + "\n\n"

_add_object_text

_add_object_text(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Dispatch function to add formatted text for various ODF elements.

This function determines the specific handler for an element based on its tag and delegates to the appropriate helper function to add its formatted text to the result list.

Parameters:

Name Type Description Default
obj Element

The element to process.

required
context dict[str, Any]

The formatting context.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def _add_object_text(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Dispatch function to add formatted text for various ODF elements.

    This function determines the specific handler for an element based on its
    tag and delegates to the appropriate helper function to add its formatted
    text to the result list.

    Args:
        obj: The element to process.
        context: The formatting context.
        result: The list to append the formatted text to.
    """
    tag = obj.tag
    if tag in ("text:a", "text:p"):
        # Simple tags with text
        return _add_object_text_paragraph(obj, context, result)
    elif tag == "text:span":
        # Try to convert some styles in rst_mode
        return _add_object_text_span(obj, context, result)
    elif tag == "text:note":
        return _add_object_text_note(obj, context, result)
    elif tag == "office:annotation":
        return _add_object_text_annotation(obj, context, result)
    elif tag == "text:tab":
        return _add_object_text_tab(obj, context, result)
    elif tag == "text:line-break":
        return _add_object_text_line_break(obj, context, result)
    else:
        result.append(obj.get_formatted_text(context))

_add_object_text_annotation

_add_object_text_annotation(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add formatted text for an annotation to the result list.

Appends the annotation’s body to the annotations list in the context and adds an appropriate marker to the result list.

Parameters:

Name Type Description Default
obj Element

The annotation element to process.

required
context dict[str, Any]

The formatting context, including ‘annotations’ and ‘rst_mode’.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def _add_object_text_annotation(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add formatted text for an annotation to the result list.

    Appends the annotation's body to the `annotations` list in the context
    and adds an appropriate marker to the result list.

    Args:
        obj: The annotation element to process.
        context: The formatting context, including 'annotations' and 'rst_mode'.
        result: The list to append the formatted text to.
    """
    context["annotations"].append(obj.note_body)  # type:ignore
    if context.get("rst_mode"):
        result.append(" [#]_ ")
    else:
        result.append("[*]")

_add_object_text_line_break

_add_object_text_line_break(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add a line break to the result list for a line break element.

Adds a standard newline or an RST-specific line break marker (`

|) depending on therst_mode` in the context.

Args:
    obj: The line break element to process.
    context: The formatting context, including 'rst_mode'.
    result: The list to append the line break to.
Source code in odfdo/mixin_paragraph_formatted.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def _add_object_text_line_break(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add a line break to the result list for a line break element.

    Adds a standard newline or an RST-specific line break marker (`\n|`)
    depending on the `rst_mode` in the context.

    Args:
        obj: The line break element to process.
        context: The formatting context, including 'rst_mode'.
        result: The list to append the line break to.
    """
    if context.get("rst_mode"):
        result.append("\n|")
    else:
        result.append("\n")

_add_object_text_note

_add_object_text_note(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Dispatch function to add formatted text for a note (footnote or endnote).

Delegates to _add_object_text_note_foot or _add_object_text_note_end based on the note’s class.

Parameters:

Name Type Description Default
obj Element

The note element to process.

required
context dict[str, Any]

The formatting context.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def _add_object_text_note(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Dispatch function to add formatted text for a note (footnote or endnote).

    Delegates to `_add_object_text_note_foot` or `_add_object_text_note_end`
    based on the note's class.

    Args:
        obj: The note element to process.
        context: The formatting context.
        result: The list to append the formatted text to.
    """
    if obj.note_class == "footnote":  # type:ignore
        return _add_object_text_note_foot(obj, context, result)
    return _add_object_text_note_end(obj, context, result)

_add_object_text_note_end

_add_object_text_note_end(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add formatted text for an endnote to the result list.

Formats the endnote citation and appends the note’s body to the endnotes list in the context.

Parameters:

Name Type Description Default
obj Element

The endnote element to process.

required
context dict[str, Any]

The formatting context, including ‘endnotes’ and ‘rst_mode’.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def _add_object_text_note_end(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add formatted text for an endnote to the result list.

    Formats the endnote citation and appends the note's body to the
    `endnotes` list in the context.

    Args:
        obj: The endnote element to process.
        context: The formatting context, including 'endnotes' and 'rst_mode'.
        result: The list to append the formatted text to.
    """
    container = context["endnotes"]
    citation = obj.citation  # type:ignore
    if not citation:
        # Would only happen with hand-made documents
        citation = len(container)
    body = obj.note_body  # type:ignore
    container.append((citation, body))
    if context.get("rst_mode"):
        marker = " [*]_ "
    else:
        marker = f"({citation})"
    result.append(marker)

_add_object_text_note_foot

_add_object_text_note_foot(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add formatted text for a footnote to the result list.

Formats the footnote citation and appends the note’s body to the footnotes list in the context.

Parameters:

Name Type Description Default
obj Element

The footnote element to process.

required
context dict[str, Any]

The formatting context, including ‘footnotes’ and ‘rst_mode’.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def _add_object_text_note_foot(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add formatted text for a footnote to the result list.

    Formats the footnote citation and appends the note's body to the
    `footnotes` list in the context.

    Args:
        obj: The footnote element to process.
        context: The formatting context, including 'footnotes' and 'rst_mode'.
        result: The list to append the formatted text to.
    """
    container = context["footnotes"]
    citation = obj.citation  # type:ignore
    if not citation:
        # Would only happen with hand-made documents
        citation = len(container)
    body = obj.note_body  # type:ignore
    container.append((citation, body))
    if context.get("rst_mode"):
        marker = " [#]_ "
    else:
        marker = f"[{citation}]"
    result.append(marker)

_add_object_text_paragraph

_add_object_text_paragraph(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add formatted text from a paragraph-like object to the result list.

Parameters:

Name Type Description Default
obj Element

The paragraph-like element to process.

required
context dict[str, Any]

The formatting context.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
33
34
35
36
37
38
39
40
41
42
43
44
45
def _add_object_text_paragraph(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add formatted text from a paragraph-like object to the result list.

    Args:
        obj: The paragraph-like element to process.
        context: The formatting context.
        result: The list to append the formatted text to.
    """
    result.append(_formatted_text(obj, context))

_add_object_text_span

_add_object_text_span(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add formatted text from a span element to the result list.

Applies bold or italic styling if rst_mode is enabled in the context and the span’s style properties indicate such formatting.

Parameters:

Name Type Description Default
obj Element

The span element to process.

required
context dict[str, Any]

The formatting context, including ‘rst_mode’ and ‘document’.

required
result list[str]

The list to append the formatted text to.

required
Source code in odfdo/mixin_paragraph_formatted.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def _add_object_text_span(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add formatted text from a span element to the result list.

    Applies bold or italic styling if `rst_mode` is enabled in the context
    and the span's style properties indicate such formatting.

    Args:
        obj: The span element to process.
        context: The formatting context, including 'rst_mode' and 'document'.
        result: The list to append the formatted text to.
    """
    text = _formatted_text(obj, context)
    if not context.get("rst_mode") or not text.strip():
        result.append(text)
        return
    if hasattr(obj, "style"):
        style_name = obj.style
    else:
        style_name = None
    if not style_name:
        result.append(text)
        return
    document = context.get("document")
    if document:
        style = document.get_style("text", style_name)
        properties = style.get_properties()
    else:
        properties = None
    if properties:
        if properties.get("fo:font-weight") == "bold":
            result.append(_bold_styled(text))
            return
        if properties.get("fo:font-style") == "italic":
            result.append(_italic_styled(text))
            return
    result.append(text)
    return

_add_object_text_tab

_add_object_text_tab(
    obj: Element, context: dict[str, Any], result: list[str]
) -> None

Add a tab character for a tab element to the result list.

Parameters:

Name Type Description Default
obj Element

The tab element to process.

required
context dict[str, Any]

The formatting context (unused in this function).

required
result list[str]

The list to append the tab character to.

required
Source code in odfdo/mixin_paragraph_formatted.py
239
240
241
242
243
244
245
246
247
248
249
250
251
def _add_object_text_tab(
    obj: Element,
    context: dict[str, Any],
    result: list[str],
) -> None:
    """Add a tab character for a tab element to the result list.

    Args:
        obj: The tab element to process.
        context: The formatting context (unused in this function).
        result: The list to append the tab character to.
    """
    result.append("\t")

_bold_styled

_bold_styled(text: str) -> str

Format a string as bold in Markdown, preserving leading/trailing spaces.

Source code in odfdo/mixin_paragraph_formatted.py
62
63
64
def _bold_styled(text: str) -> str:
    """Format a string as bold in Markdown, preserving leading/trailing spaces."""
    return f"{_pre(text)}**{text.strip()}**{_post(text)}"

_formatted_text

_formatted_text(
    element: Element, context: dict[str, Any]
) -> str

Recursively extract and format text from an element and its children.

This function processes the element’s direct children and text nodes, applying specific formatting rules based on their type and the provided context.

Parameters:

Name Type Description Default
element Element

The element from which to extract formatted text.

required
context dict[str, Any]

A dictionary containing formatting context (e.g., rst_mode, document reference).

required

Returns:

Name Type Description
str str

The extracted and formatted text content.

Source code in odfdo/mixin_paragraph_formatted.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def _formatted_text(element: Element, context: dict[str, Any]) -> str:
    """Recursively extract and format text from an element and its children.

    This function processes the element's direct children and text nodes,
    applying specific formatting rules based on their type and the provided context.

    Args:
        element: The element from which to extract formatted text.
        context: A dictionary containing formatting context
            (e.g., `rst_mode`, document reference).

    Returns:
        str: The extracted and formatted text content.
    """
    result: list[str] = []
    objects: list[Element | EText] = element.xpath("*|text()")
    for obj in objects:
        if isinstance(obj, EText):
            result.append(obj)
            continue
        _add_object_text(obj, context, result)
    return "".join(result)

_italic_styled

_italic_styled(text: str) -> str

Format a string as italic in Markdown, preserving leading/trailing spaces.

Source code in odfdo/mixin_paragraph_formatted.py
67
68
69
def _italic_styled(text: str) -> str:
    """Format a string as italic in Markdown, preserving leading/trailing spaces."""
    return f"{_pre(text)}*{text.strip()}*{_post(text)}"

_post

_post(text: str) -> str

Extract trailing whitespace from a string.

Source code in odfdo/mixin_paragraph_formatted.py
55
56
57
58
59
def _post(text: str) -> str:
    """Extract trailing whitespace from a string."""
    if m := RE_SP_POST.search(text):
        return m.group()
    return ""

_pre

_pre(text: str) -> str

Extract leading whitespace from a string.

Source code in odfdo/mixin_paragraph_formatted.py
48
49
50
51
52
def _pre(text: str) -> str:
    """Extract leading whitespace from a string."""
    if m := RE_SP_PRE.match(text):
        return m.group()
    return ""