Recipes
Recipes source code is in the /recipes
directory of odfdo
sources.
Most recipes are autonomous scripts doing actual modifications of ODF sample files, you can check the results in the recipes/recipes_output
directory.
How to write hello world in a text document
Create a minimal text document with “Hello World” in a pragraph.
recipes/how_to_write_hello_world_in_a_text_document.py
#!/usr/bin/env python
"""Create a minimal text document with "Hello World" in a pragraph."""
import os
from pathlib import Path
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 3
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_hello"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
document = Document("text")
body = document.body
body.clear()
paragraph = Paragraph("Hello World")
body.append(paragraph)
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text = str(document.body)
assert text == "Hello World\n"
if __name__ == "__main__":
main()
How to write hello world in a spreadsheet document
Create a minimal spreadsheet with “Hello World” in the first cell.
recipes/how_to_write_hello_world_in_a_spreadsheet_document.py
#!/usr/bin/env python
"""Create a minimal spreadsheet with "Hello World" in the first cell."""
import os
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 5
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "document.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
document = Document("spreadsheet")
body = document.body
body.clear()
table = Table("Hello table")
table.set_value("A1", "Hello World")
body.append(table)
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text = document.body.get_table(0).get_cell((0, 0)).value.strip()
print(text)
assert text == "Hello World"
if __name__ == "__main__":
main()
Basic presentation hello world
Write a basic “Hello World” in the middle of the first page of a presentation.
recipes/basic_presentation_hello_world.py
#!/usr/bin/env python
"""Write a basic "Hello World" in the middle of the first page
of a presentation.
"""
import os
from pathlib import Path
from odfdo import Document, DrawPage, Frame
_DOC_SEQUENCE = 7
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_odp"
TARGET = "hello.odp"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def add_text_frame(document: Document, text: str) -> None:
"""Add a text frame to an empty presentation."""
body = document.body
body.clear()
page = DrawPage("page1", name="Page 1")
text_frame = Frame.text_frame(
text,
size=("7cm", "5cm"),
position=("11cm", "8cm"),
style="Standard",
text_style="Standard",
)
page.append(text_frame)
body.append(page)
def main() -> None:
document = Document("presentation")
add_text_frame(document, "Hello world!")
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
frames = document.body.get_frames()
assert len(frames) == 1
assert str(frames[0]).strip() == "Hello world!"
if __name__ == "__main__":
main()
Create a basic text document
Create a basic text document with headers and praragraphs.
recipes/create_a_basic_text_document.py
#!/usr/bin/env python
"""Create a basic text document with headers and praragraphs."""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 10
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_text"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_basic_document() -> Document:
"""Generate a basic text document."""
document = Document("text")
body = document.body
body.clear()
body.append(Header(1, "De la Guerre des Gaules - Livre V"))
body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
body.append(
Paragraph(
"Sous le consulat de Lucius Domitius et d'Appius Claudius, "
"César, quittant les quartiers d'hiver pour aller en Italie, "
"comme il avait coutume de le faire chaque année, ordonne aux "
"lieutenants qu'il laissait à la tête des légions de construire, "
"pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
"et de réparer les anciens."
)
)
body.append(Header(2, "La Bretagne"))
body.append(
Paragraph(
"Cette île est de forme triangulaire ; l'un des côtés regarde "
"la Gaule. Des deux angles de ce côté, l'un est au levant, "
"vers le pays de Cantium, où abordent presque tous les vaisseaux "
"gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
"est d'environ cinq cent mille pas. "
)
)
return document
def main() -> None:
document = create_basic_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text = str(document.body.get_paragraph(position=1))
assert text.startswith("Cette île est de forme triangulaire")
if __name__ == "__main__":
main()
How to add a paragraph to a text document
Minimal example of how to add a paragraph.
recipes/how_to_add_a_paragraph_to_a_text_document.py
"""Minimal example of how to add a paragraph."""
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 12
def main() -> None:
document = Document("text")
body = document.body
# create a new paragraph with some content :
paragraph = Paragraph("Hello World")
body.append(paragraph)
if __name__ == "__main__":
main()
Create a basic text document with a list
Create a basic text document with a list.
recipes/create_a_basic_text_document_with_a_list.py
#!/usr/bin/env python
"""Create a basic text document with a list."""
import os
from pathlib import Path
from odfdo import Document, List, ListItem
_DOC_SEQUENCE = 20
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_list"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
"""Generate a basic text document with a list."""
# Create the document
document = Document("text")
body = document.body
# Adding List
my_list = List(["Arthur", "Ford", "Trillian"])
# The list accepts a Python list of strings and list items.
# The list can be written even though we will modify it afterwards:
body.append(my_list)
# Adding more List Item to the list
item = ListItem("Marvin")
my_list.append_item(item)
# it should contain:
print(document.get_formatted_text())
# - Arthur
# - Ford
# - Trillian
# - Marvin
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert str(document).strip() == "- Arthur\n- Ford\n- Trillian\n- Marvin"
if __name__ == "__main__":
main()
Create a basic text document with list and sublists
Create a short text document containing a list of items and a few sublists. The code demonstrates several manipulations of the list and its items, then displays the result to standard output.
recipes/create_a_basic_text_document_with_list_and_sublists.py
#!/usr/bin/env python
"""Create a short text document containing a list of items and a few sublists.
The code demonstrates several manipulations of the list and its items, then
displays the result to standard output."""
import os
from pathlib import Path
from odfdo import Document, List, ListItem
_DOC_SEQUENCE = 25
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_sublist"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
"""Generate a basic document containing some lists and sublists."""
document = Document("text")
body = document.body
body.clear()
# Make a list:
name_list = List(["Arthur", "Ford", "Trillian"])
# It is possible to add an item to the list afterwards:
item = ListItem("Marvin")
name_list.append_item(item)
# add list to the document:
body.append(name_list)
# Adding Sublist:
# A sublist is simply a list as an item of another list:
item.append(List(["Paranoid Android", "older than the universe"]))
# See the result:
print(document.get_formatted_text())
# - Arthur
# - Ford
# - Trillian
# - Marvin
#
# - Paranoid Android
# - older than the universe
# Inserting List Item
# In case your forgot to insert an item:
name_list.insert_item("some dolphins", position=1)
# Or you can insert it before another item:
marvin = name_list.get_item(content="Marvin")
name_list.insert_item("Zaphod", before=marvin)
# Or after:
name_list.insert_item("and many others", after=marvin)
# See the result:
print(document.get_formatted_text())
# - Arthur
# - some dolphins
# - Ford
# - Trillian
# - Zaphod
# - Marvin
#
# - Paranoid Android
# - older than the universe
# - and many others
#
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert document.get_formatted_text().strip() == (
"- Arthur\n"
"- some dolphins\n"
"- Ford\n"
"- Trillian\n"
"- Zaphod\n"
"- Marvin\n"
" \n"
" - Paranoid Android\n"
" - older than the universe\n"
"- and many others"
)
if __name__ == "__main__":
main()
How to add a sublist to a list
Minimal example of how to add a sublist to a list.
recipes/how_to_add_a_sublist_to_a_list.py
#!/usr/bin/env python
"""Minimal example of how to add a sublist to a list."""
import os
from pathlib import Path
from odfdo import Document, List, ListItem
_DOC_SEQUENCE = 27
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "minimal_list"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
"""Return a text document with a list."""
document = Document("text")
body = document.body
my_list = List(["chocolat", "café"])
body.append(my_list)
item = ListItem("thé")
my_list.append(item)
# A sublist is simply a list as an item of another list
item.append(List(["thé vert", "thé rouge"]))
print(body.serialize(True))
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_lists()) == 2
if __name__ == "__main__":
main()
How to add an item to a list
Minimal example of how to add an item to a list.
recipes/how_to_add_an_item_to_a_list.py
#!/usr/bin/env python
"""Minimal example of how to add an item to a list."""
import os
from odfdo import List, ListItem
_DOC_SEQUENCE = 28
def generate_list() -> List:
"""Return a small List."""
drink_list = List(["chocolate", "coffee"])
item = ListItem("tea")
drink_list.append(item)
return drink_list
def main() -> None:
some_list = generate_list()
test_unit(some_list)
def test_unit(some_list: List) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert str(some_list).strip() == "- chocolate\n - coffee\n - tea"
if __name__ == "__main__":
main()
How to insert a new item within a list
Minimal example of how to insert a new item within a list.
recipes/how_to_insert_a_new_item_within_a_list.py
#!/usr/bin/env python
"""Minimal example of how to insert a new item within a list."""
import os
from odfdo import List
_DOC_SEQUENCE = 29
def generate_list() -> List:
"""Return a List with inserted items."""
drink_list = List(["chocolate", "coffee"])
# insert as second item:
drink_list.insert_item("tea", position=1)
# insert it before another item:
coffee = drink_list.get_item(content="coffee")
drink_list.insert_item("green tea", before=coffee)
# Or after:
drink_list.insert_item("black tea", after=coffee)
print(str(drink_list))
return drink_list
def main() -> None:
some_list = generate_list()
test_unit(some_list)
def test_unit(some_list: List) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert str(some_list).strip() == (
"- chocolate\n - tea\n - green tea\n - coffee\n - black tea"
)
if __name__ == "__main__":
main()
Get text content from odt file
Read the text content from an .odt file.
recipes/get_text_content_from_odt_file.py
#!/usr/bin/env python
"""Read the text content from an .odt file."""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 30
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def read_text_content(document: Document) -> str:
"""Return the text content of the document."""
# just verify what type of document it is:
print("Type of document:", document.get_type())
# A quick way to get the text content:
text = document.get_formatted_text()
print("Size of text:", len(text))
# Let's show the beginning :
print("Start of the text:")
print(text[:240])
return text
def main() -> None:
document = read_source_document()
text = read_text_content(document)
test_unit(text)
def test_unit(text: str) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(text) == 56828
if __name__ == "__main__":
main()
Create a basic text document with a table of content
Create a basic document containing some paragraphs and headers, add a Table of Content from its headers.
recipes/create_a_basic_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Create a basic document containing some paragraphs and headers, add a
Table of Content from its headers.
"""
import os
from pathlib import Path
from odfdo import TOC, Document, Header, Paragraph
_DOC_SEQUENCE = 35
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_toc"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a basic document containing some paragraphs and headers."""
document = Document("text")
body = document.body
body.clear()
level_1_title = Header(1, LOREM[:70])
body.append(level_1_title)
for idx in range(3):
level_2_title = Header(2, LOREM[idx * 5 : 70 + idx * 5])
body.append(level_2_title)
paragraph = Paragraph(LOREM)
body.append(paragraph)
return document
def add_toc(document: Document) -> None:
"""Add a Table of Content to the document from its headers."""
# Create the Table Of Content
toc = TOC()
# Changing the default "Table Of Content" Title :
toc.title = "My Table of Content"
# If the TOC is append to document, it will appera at the end:
# document.body.append(toc)
#
# So we prefer to insert it at top of document:
document.body.insert(toc, position=0)
# Beware, update the TOC with the actual content. If not done there,
# the reader will need to "update the table of content" later.
toc.fill()
def main() -> None:
document = make_document()
add_toc(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
toc = document.body.get_toc()
assert str(toc).split("\n")[2] == (
"1.1. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed non risu"
)
position = document.body.index(toc)
assert position == 0
if __name__ == "__main__":
main()
Update a text document with a table of content
Update the table of contents of a document.
recipes/update_a_text_document_with_a_table_of_content.py
#!/usr/bin/env python
"""Update the table of contents of a document."""
import os
import sys
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 38
DATA = Path(__file__).parent / "data"
SOURCE = "doc_with_toc.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_toc"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def update_toc(document: Document) -> None:
"""Make updates and changes to the document's table of contents."""
check_toc_v1(document)
add_some_header(document)
check_toc_v2(document)
change_toc_title(document)
check_toc_v3(document)
change_toc_title_to_empty(document)
check_toc_v4(document)
remove_second_header_1b(document)
check_toc_v5(document)
add_toc_title(document)
check_toc_v6(document)
def check_toc_v1(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 5
assert content[0].startswith("Table of Contents")
assert content[1].startswith("1. Lorem 1")
assert content[2].startswith("1.1. Lorem 1A")
assert content[3].startswith("1.2. Lorem 1B")
assert content[4].startswith("1.3. Lorem 1C")
def add_some_header(document: Document) -> None:
header = Header(1, "New header")
document.body.append(header)
document.body.append(Paragraph("Some text after the new header."))
# update the table of contents
toc = document.body.toc
toc.fill(document)
def check_toc_v2(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 6
assert content[0].startswith("Table of Contents")
assert content[1].startswith("1. Lorem 1")
assert content[2].startswith("1.1. Lorem 1A")
assert content[3].startswith("1.2. Lorem 1B")
assert content[4].startswith("1.3. Lorem 1C")
assert content[5].startswith("2. New header")
def change_toc_title(document: Document) -> None:
toc = document.body.toc
toc.set_toc_title("Another title")
toc.fill(document)
def check_toc_v3(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 6
assert content[0].startswith("Another title")
def change_toc_title_to_empty(document: Document) -> None:
toc = document.body.toc
toc.set_toc_title("") # that will remove the title
toc.fill(document)
def check_toc_v4(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 5
assert content[0].startswith("1. Lorem 1")
assert content[1].startswith("1.1. Lorem 1A")
assert content[2].startswith("1.2. Lorem 1B")
assert content[3].startswith("1.3. Lorem 1C")
assert content[4].startswith("2. New header")
def remove_second_header_1b(document: Document) -> None:
# find second header:
header = document.body.get_header(position=2)
# this 'header' variable is attached to the document, so
# deleting will remove the element from the document
header.delete()
toc = document.body.toc
toc.fill(document)
def check_toc_v5(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 4
assert content[0].startswith("1. Lorem 1")
assert content[1].startswith("1.1. Lorem 1A")
assert content[2].startswith("1.2. Lorem 1C")
assert content[3].startswith("2. New header")
def add_toc_title(document: Document) -> None:
toc = document.body.toc
toc.set_toc_title("A new title")
toc.fill(document)
def check_toc_v6(document: Document) -> None:
toc = document.body.toc
content = str(toc).split("\n")
assert len(content) == 5
assert content[0].startswith("A new title")
assert content[1].startswith("1. Lorem 1")
assert content[2].startswith("1.1. Lorem 1A")
assert content[3].startswith("1.2. Lorem 1C")
assert content[4].startswith("2. New header")
def main() -> None:
document = read_source_document()
update_toc(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
check_toc_v6(document)
if __name__ == "__main__":
main()
Create a basic text document with annotations
Create a basic document containing some paragraphs and headers, add some annotations. Annotations are notes that don’t appear in the document but typically on a side bar in a desktop application. So they are not printed.
recipes/create_a_basic_text_document_with_annotations.py
#!/usr/bin/env python
"""Create a basic document containing some paragraphs and headers, add some
annotations. Annotations are notes that don't appear in the document but
typically on a side bar in a desktop application. So they are not printed.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 40
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_annotations"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a basic document containing some paragraphs and headers."""
document = Document("text")
body = document.body
body.clear()
level_1_title = Header(1, "Main title")
body.append(level_1_title)
for idx in range(3):
level_2_title = Header(2, f"title {idx}")
body.append(level_2_title)
paragraph = Paragraph(LOREM[:240])
body.append(paragraph)
return document
def make_annotations(document: Document) -> None:
"""Add some annotation on each pragraph of the document."""
word_position = 0
for paragraph in document.body.paragraphs:
# choosing some word of the paragraph to insert the note
word_position += 3
some_word = str(paragraph).split()[word_position]
# Adding Annotation
paragraph.insert_annotation(
# The word after what the annotation is inserted:
after=some_word,
# The annotation itself, at the end of the page:
body=f"It's so easy ! (after {some_word!r})",
# The author of the annotation:
creator="Bob",
# A datetime value, by default datetime.now():
# date= xxx
)
def main():
document = make_document()
make_annotations(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_annotations(creator="Bob")) == 3
if __name__ == "__main__":
main()
Create a basic text document with footnotes
Create a basic document containing some paragraphs and headers, add some footnotes. Footnotes are displayed at the end of the pages of the document.
recipes/create_a_basic_text_document_with_footnotes.py
#!/usr/bin/env python
"""Create a basic document containing some paragraphs and headers, add some
footnotes. Footnotes are displayed at the end of the pages of the document.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 45
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_footnotes"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a basic document containing some paragraphs and headers."""
document = Document("text")
body = document.body
body.clear()
level_1_title = Header(1, "Main title")
body.append(level_1_title)
for idx in range(3):
level_2_title = Header(2, f"title {idx}")
body.append(level_2_title)
paragraph = Paragraph(LOREM[:240])
body.append(paragraph)
return document
def make_footnotes(document: Document) -> None:
"""Add some footnote for each pragraph of the document."""
word_position = 0
note_counter = 0
for paragraph in document.body.paragraphs:
# choosing some word of the paragraph to insert the note
word_position += 3
some_word = str(paragraph).split()[word_position]
# Notes are quite complex so they deserve a dedicated API on paragraphs:
note_counter += 10
paragraph.insert_note(
# The word after what the “¹” citation is inserted:
after=some_word,
# A unique identifier of the note in the document:
note_id=f"note{note_counter}",
# The symbol the user sees to follow the footnote:
citation="1",
# The footnote content itself:
body=('John Doe, A. (2007). "How to cite references", Sample Editions.'),
)
def main() -> None:
document = make_document()
make_footnotes(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_notes()) == 3
if __name__ == "__main__":
main()
How to add footnote to a text document
Minimal example of how to add an footnote to a text document.
Notes are quite complex so they deserve a dedicated API on paragraphs:
paragraph.insert_note()
The arguments are:
after => The word after what the “¹” citation is inserted. note_id => A unique identifier of the note in the document. citation => The symbol the user sees to follow the footnote. body => The footnote itself, at the end of the page.
odfdo creates footnotes by default. To create endnotes (notes that appear at the end of the document), add the parameter: note_class=’endnote’.
recipes/how_to_add_footnote_to_a_text_document.py
#!/usr/bin/env python
"""Minimal example of how to add an footnote to a text document.
Notes are quite complex so they deserve a dedicated API on paragraphs:
paragraph.insert_note()
The arguments are:
after => The word after what the “¹” citation is inserted.
note_id => A unique identifier of the note in the document.
citation => The symbol the user sees to follow the footnote.
body => The footnote itself, at the end of the page.
odfdo creates footnotes by default. To create endnotes (notes
that appear at the end of the document), add the parameter:
note_class='endnote'.
"""
import os
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 47
def generate_document() -> Document:
"""Return a document with a footnote."""
document = Document("text")
body = document.body
body.clear()
paragraph = Paragraph("A paragraph with a footnote about some references.")
body.append(paragraph)
paragraph.insert_note(
after="graph",
note_id="note1",
citation="1",
body='John Doe (2007). "How to cite references" New York: Books.',
)
return document
def main() -> None:
document = generate_document()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
notes = document.body.get_notes()
assert len(notes) == 1
assert notes[0].note_id == "note1"
if __name__ == "__main__":
main()
Create a text document with tables in it
Build a commercial document, with numerical values displayed in both the text and in a table.
recipes/create_a_text_document_with_tables_in_it.py
#!/usr/bin/env python
"""Build a commercial document, with numerical values displayed in
both the text and in a table.
"""
import os
from pathlib import Path
from odfdo import (
Cell,
Document,
Header,
List,
ListItem,
Paragraph,
Row,
Table,
create_table_cell_style,
make_table_cell_border_string,
)
_DOC_SEQUENCE = 50
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "commercial"
TARGET = "commercial.odt"
TAX_RATE = 0.20
class Product:
"""Minimalistic Product."""
def __init__(self, reference: int, name: str, price: float) -> None:
self.reference = reference
self.name = f"Product {name}"
self.price = price
class OrderLine:
"""Line of an Order."""
def __init__(self, reference: int, quantity: int) -> None:
self.reference = reference
self.quantity = quantity
def make_product_catalog() -> list[Product]:
"""Generate a list of Product."""
catalog: list[Product] = []
price = 10.0
for index in range(5):
catalog.append(Product(index, chr(65 + index), price))
price += 10.5
return catalog
def make_order(catalog: list[Product]) -> list[OrderLine]:
"""Generate purchase order list."""
order: list[OrderLine] = []
quantity = 1
for product in catalog:
quantity = int(quantity * 2.5)
order.append(OrderLine(product.reference, quantity))
return order
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_header_cell_style(doc: Document) -> str:
"""Create a Cell style, insert it in Document, return its name."""
border = make_table_cell_border_string(thick="0.03cm", color="black")
cell_style = create_table_cell_style(
color="black",
background_color=(210, 210, 210),
border_right=border,
border_left=border,
border_bottom=border,
border_top=border,
)
style_name = doc.insert_style(style=cell_style, automatic=True)
return style_name
def add_top_content(doc: Document, catalog: list[Product]) -> None:
"""Add some descriptive content to the document."""
body = doc.body
title1 = Header(1, "Basic commercial document")
body.append(title1)
title11 = Header(2, "Available products")
body.append(title11)
paragraph = Paragraph("Here the list:")
body.append(paragraph)
# List of products in a list :
product_list = List() # odfdo.List
body.append(product_list)
for product in catalog:
item = ListItem(f"{product.name:<10}, price: {product.price:.2f} €")
product_list.append(item)
def add_order_table(
doc: Document, catalog: list[Product], order: list[OrderLine]
) -> None:
"""Add a table with order lines."""
body = doc.body
title12 = Header(2, "Your order")
body.append(title12)
style_name = create_header_cell_style(doc)
table = make_order_table(catalog, order, style_name)
body.append(table)
def make_order_table(
catalog: list[Product],
order: list[OrderLine],
style_name: str,
) -> Table:
"""Build the order table."""
table = Table("Table")
# Header of table
row = Row()
row.set_values(["Product", "Price", "Quantity", "Amount"])
table.set_row("A1", row)
# or: table.set_row(0, row)
# Add a row for each order line
row_number = 0
for line in order:
row_number += 1
product = catalog[line.reference]
row = Row()
row.set_value("A", product.name)
# or : row.set_value(0, product.name)
cell = Cell()
cell.set_value(
product.price,
text=f"{product.price:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell("B", cell)
# or : row.set_cell(1, cell)
row.set_value("C", line.quantity)
# row.set_value(2, line.quantity)
price = product.price * line.quantity
cell = Cell()
cell.set_value(
price,
text=f"{price:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell("D", cell)
table.set_row(row_number, row)
# Total lines
# add a merged empty row
row = Row()
row_number += 1
table.set_row(row_number, row)
table.set_span((0, row_number, 3, row_number))
# compute total line
row = Row()
row_number += 1
row.set_value(0, "Total:")
total = sum(table.get_column_values(3)[1:-1])
# note: total is a Decimal
cell = Cell()
cell.set_value(
total,
text=f"{total:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell(3, cell)
table.set_row(row_number, row)
# merge the 3 first columns for this row:
table.set_span((0, row_number, 2, row_number), merge=True)
# compute VAT line
row = Row()
row_number += 1
row.set_value(0, "Total with tax:")
total_vat = float(total) * (1 + TAX_RATE)
cell = Cell()
cell.set_value(
total_vat,
text=f"{total_vat:.2f} €",
currency="EUR",
cell_type="float",
)
row.set_cell(3, cell)
table.set_row(row_number, row)
table.set_span((0, row_number, 2, row_number), merge=True)
# Let's add some style on header row
row = table.get_row(0)
for cell in row.traverse():
cell.style = style_name
row.set_cell(x=cell.x, cell=cell)
table.set_row(row.y, row)
return table
def generate_commercial(catalog: list[Product], order: list[OrderLine]) -> Document:
"""Generate a Text Document with table in in."""
document = Document("text")
add_top_content(document, catalog)
add_order_table(document, catalog, order)
return document
def main() -> None:
catalog = make_product_catalog()
order = make_order(catalog)
document = generate_commercial(catalog, order)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(name="Table")
assert isinstance(table, Table)
assert table.get_cell("A1").value == "Product"
assert table.get_cell("A2").value == "Product A"
assert table.get_cell("A8").value == "Total:"
assert table.get_cell("B1").value == "Price"
assert table.get_cell("C1").value == "Quantity"
assert table.get_cell("C2").value == 2
assert table.get_cell("D1").value == "Amount"
if __name__ == "__main__":
main()
How to add a table to a document
Minimal example of how to add a table to a text document.
recipes/how_to_add_a_table_to_a_document.py
#!/usr/bin/env python
"""Minimal example of how to add a table to a text document."""
import os
from odfdo import Document, Header, Paragraph, Table
_DOC_SEQUENCE = 55
def generate_document() -> Document:
"""Return a document with a 3x3 table."""
document = Document("text")
body = document.body
body.append(Header(1, "Tables"))
body.append(Paragraph("A 3x3 table:"))
# Creating a table :
table = Table("Table name", width=3, height=3)
body.append(table)
return document
def main() -> None:
document = generate_document()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(0)
assert table.size == (3, 3)
if __name__ == "__main__":
main()
Create a text document from plain text with layout
Create a text document with custom styles. In this recipe, the styles are created from their XML definition.
Steps:
-
Remove standard styles from the document,
-
set some styles grabed from a styles.xml ODF file (or generated),
-
insert plain “python” text, containing some , , and spaces.
recipes/create_a_text_document_from_plain_text_with_layout.py
#!/usr/bin/env python
"""Create a text document with custom styles. In this recipe, the styles
are created from their XML definition.
Steps:
- Remove standard styles from the document,
- set some styles grabed from a styles.xml ODF file (or generated),
- insert plain "python" text, containing some \t , \n, and spaces.
"""
import os
from pathlib import Path
from odfdo import Document, Element, Paragraph, Style
_DOC_SEQUENCE = 60
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled2"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def add_content(document: Document) -> None:
"""Add some styled content to the document."""
# Some plain text :
text_1 = (
"Lorem ipsum dolor sit amet,\n\t"
"consectetuer adipiscing elit.\n\tSed"
"non risus.\n\tSuspendisse lectus tortor,\n"
"ndignissim sit amet, \nadipiscing nec,"
"\nultricies sed, dolor.\n\n"
" Cras elementum ultrices diam. Maecenas ligula massa,"
"varius a,semper congue, euismod non,"
" mi. Proin porttitor, orci nec nonummy"
"molestie, enim est eleifend mi,"
" non fermentum diam nisl sit amet erat."
)
text_2 = (
"Vestibulum "
"ante "
"ipsum primis\n"
"in faucibus orci luctus et ultrices "
"posuere cubilia Curae; Aliquam nibh."
)
text_3 = (
"Duis semper. \n\tDuis arcu massa,"
" \n\t\tscelerisque vitae, \n"
"\t\t\tconsequat in, \n"
"\t\t\t\tpretium a, enim. \n"
"\t\t\t\t\tPellentesque congue. \n"
"Ut in risus volutpat libero pharetra "
"tempor. Cras vestibulum bibendum augue."
"Praesent egestas leo in pede. Praesent "
"blandit odio eu enim. Pellentesque sed"
)
# By default, paragraph text is added in "plain text" mode, so tabs or
# line breaks are translated into the appropriate ODF structure.
body = document.body
paragraph = Paragraph(text_1, style="description")
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
paragraph = Paragraph(text_2, style="smallserif")
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
paragraph = Paragraph("A: " + text_3, style="description")
# span offset become complex after inserting <CR> and <TAB> in a text
paragraph.set_span("bolder", offset=5, length=6) # find TEXT position 5 : 6
paragraph.set_span("bolder", offset=18, length=4) # find TEXT position 18 : 4
paragraph.set_span("bolder", offset=49) # find TEXT position 18 to the end
# of the text bloc
paragraph.set_span("bolder", regex=r"Praes\w+\s\w+") # regex: Praes. + next word
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
# it is possible to add the content without the original layout (\n, tab, spaces)
paragraph = Paragraph("B: " + text_3, style="description", formatted=False)
body.append(paragraph)
paragraph = Paragraph(style="line")
body.append(paragraph)
# text can also be append after paragraph creation
paragraph = Paragraph(style="description")
paragraph.append("C: " + text_3)
body.append(paragraph)
def add_styles(document) -> None:
"""Add styles to the document from their XML definition."""
# Element is the base class of all odfdo classes.
# Element.from_tag permits the creation of any ODF XML tag
# some font styles :
style_font_1 = Element.from_tag(
'<style:font-face style:name="OpenSymbol" svg:font-family="OpenSymbol"/>'
)
style_font_2 = Element.from_tag(
'<style:font-face style:name="Liberation Serif" '
'svg:font-family="Liberation Serif" '
'style:font-family-generic="roman" '
'style:font-pitch="variable"/>'
)
style_font_3 = Element.from_tag(
'<style:font-face style:name="Liberation Sans" '
'svg:font-family="Liberation Sans" '
'style:font-family-generic="swiss" '
'style:font-pitch="variable"/>'
)
# page layout style (changing margin)
style_page = Element.from_tag(
'<style:page-layout style:name="MyLayout">'
'<style:page-layout-properties fo:page-width="21.00cm" '
'fo:page-height="29.70cm" style:num-format="1" '
'style:print-orientation="portrait" fo:margin-top="1.7cm" '
'fo:margin-bottom="1.5cm" fo:margin-left="1.6cm" '
'fo:margin-right="1.6cm" style:writing-mode="lr-tb" '
'style:footnote-max-height="0cm"><style:footnote-sep '
'style:width="0.018cm" style:distance-before-sep="0.10cm" '
'style:distance-after-sep="0.10cm" style:line-style="solid" '
'style:adjustment="left" style:rel-width="25%" '
'style:color="#000000"/> </style:page-layout-properties>'
"<style:footer-style> "
'<style:header-footer-properties fo:min-height="0.6cm" '
'fo:margin-left="0cm" fo:margin-right="0cm" '
'fo:margin-top="0.3cm" style:dynamic-spacing="false"/> '
"</style:footer-style></style:page-layout>"
)
# master style, using the precedent layout for the actual document
style_master = Element.from_tag(
'<style:master-page style:name="Standard" '
'style:page-layout-name="MyLayout"><style:footer>'
'<text:p text:style-name="Footer"> '
"<text:tab/><text:tab/><text:page-number "
'text:select-page="current"/> / <text:page-count '
'style:num-format="1">15</text:page-count>'
"</text:p></style:footer> "
"</style:master-page>"
)
# some footer
style_footer = Element.from_tag(
'<style:style style:name="Footer" '
'style:family="paragraph" style:class="extra" '
'style:master-page-name="">'
'<style:paragraph-properties style:page-number="auto" '
'text:number-lines="false" text:line-number="0">'
"<style:tab-stops>"
'<style:tab-stop style:position="8.90cm" '
'style:type="center"/>'
'<style:tab-stop style:position="17.80cm" style:type="right"/>'
"</style:tab-stops>"
"</style:paragraph-properties>"
"<style:text-properties "
'style:font-name="Liberation Sans" '
'fo:font-size="7pt"/></style:style>'
)
# some text style using Liberation Sans font
style_description = Element.from_tag(
'<style:style style:name="description" '
'style:family="paragraph" '
'style:class="text" style:master-page-name="">'
"<style:paragraph-properties "
'fo:margin="100%" fo:margin-left="0cm" fo:margin-right="0cm" '
'fo:margin-top="0.35cm" fo:margin-bottom="0.10cm" '
'style:contextual-spacing="false" '
'fo:text-indent="0cm" '
'style:auto-text-indent="false" '
'style:page-number="auto"/>'
"<style:text-properties "
'style:font-name="Liberation Sans" '
'fo:font-size="11pt"/>'
"</style:style>"
)
# some text style using Liberation Serif font
style_small_serif = Element.from_tag(
'<style:style style:name="smallserif" '
'style:family="paragraph" style:class="text">'
'<style:paragraph-properties fo:margin="100%" '
'fo:margin-left="1.20cm" '
'fo:margin-right="0cm" fo:margin-top="0cm" '
'fo:margin-bottom="0.10cm" '
'style:contextual-spacing="false" '
'fo:text-indent="0cm" '
'style:auto-text-indent="false"/>'
'<style:text-properties style:font-name="Liberation Serif" '
'fo:font-size="9pt" '
'fo:font-weight="normal"/>'
"</style:style>"
)
# some style to have stylish line in text
style_line = Element.from_tag(
'<style:style style:name="line" '
'style:family="paragraph" style:class="text">'
'<style:paragraph-properties fo:margin="100%" '
'fo:margin-left="0cm" '
'fo:margin-right="0cm" fo:margin-top="0cm" '
'fo:margin-bottom="0.15cm" '
'style:contextual-spacing="false" fo:text-indent="0cm" '
'style:auto-text-indent="false" fo:padding="0cm" '
'fo:border-left="none" '
'fo:border-right="none" fo:border-top="none" '
'fo:border-bottom="0.06pt solid #000000"/>'
'<style:text-properties style:font-name="Liberation Sans" '
'fo:font-size="9pt"/>'
"</style:style>"
)
# some odfdo generated style (for bold Span)
style_bold = Style("text", name="bolder", bold=True)
# remove default styles
document.delete_styles()
# add our styles
document.insert_style(style_font_1, default=True)
document.insert_style(style_font_2, default=True)
document.insert_style(style_font_3, default=True)
document.insert_style(style_page, automatic=True)
document.insert_style(style_master)
document.insert_style(style_footer)
document.insert_style(style_description)
document.insert_style(style_small_serif)
document.insert_style(style_line)
document.insert_style(style_bold)
def generate_document() -> Document:
"""Return a text document with custom styles."""
document = Document("text")
document.body.clear()
add_styles(document)
add_content(document)
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
doc_styles_names = {s.name for s in document.get_styles()}
assert len(doc_styles_names) == 11
for name in ("bolder", "description", "line", "smallserif"):
assert name in doc_styles_names
paragraphs = document.body.paragraphs
assert len(paragraphs) == 9
para0 = paragraphs[0]
assert "Lorem ipsum dolor" in str(para0)
assert para0.style == "description"
if __name__ == "__main__":
main()
Add a custom footer to a text document
Minimal example of setting a page footer using Style.set_page_footer().
Note: the created footer uses the current footer style, to change that footer style, use the method set_footer_style() on the ‘page-layout’ style family.
recipes/add_a_custom_footer_to_a_text_document.py
#!/usr/bin/env python
"""Minimal example of setting a page footer using Style.set_page_footer().
Note: the created footer uses the current footer style, to change that
footer style, use the method set_footer_style() on the 'page-layout'
style family.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph, Tab, VarPageNumber
_DOC_SEQUENCE = 62
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled4"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a short document with a page footer."""
text_1 = (
"Lorem ipsum dolor sit amet,\n\t"
"consectetuer adipiscing elit.\n\tSed"
"non risus.\n\tSuspendisse lectus tortor,\n"
"ndignissim sit amet, \nadipiscing nec,"
"\nultricies sed, dolor.\n\n"
" Cras elementum ultrices diam. Maecenas ligula massa,"
"varius a,semper congue, euismod non,"
" mi. Proin porttitor, orci nec nonummy"
"molestie, enim est eleifend mi,"
" non fermentum diam nisl sit amet erat."
)
document = Document("text")
body = document.body
body.clear()
body.append(Header(1, "Some Title"))
body.append(Paragraph(text_1))
# looking for the current "master-page" style, it is probably
# named "Standard". If not found, search with something like:
# print([s for s in document.get_styles() if s.family == "master-page"])
page_style = document.get_style("master-page", "Standard")
# The footer can be a Paragraph or a list of Paragraphs:
first_line = Paragraph("\tA first footer line")
second_line = Paragraph("Second line")
second_line.append(Tab())
second_line.append(Tab())
second_line.append(VarPageNumber())
second_line.append(".")
my_footer = [first_line, second_line]
page_style.set_page_footer(my_footer)
# important: insert again the modified style
document.insert_style(page_style)
return document
def main() -> None:
document = make_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
from odfdo import Style
assert len([s for s in document.get_styles() if s.family == "master-page"]) >= 1
page_style = document.get_style("master-page", "Standard")
assert isinstance(page_style, Style)
footer = page_style.get_page_footer()
content = footer.serialize()
assert "A first footer" in content
assert "Second line" in content
if __name__ == "__main__":
main()
How to add a picture to a text document
Create an empty text document and add a picture in a frame.
recipes/how_to_add_a_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame.
"""
from pathlib import Path
from odfdo import Document, Frame, Paragraph
_DOC_SEQUENCE = 65
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"
def save_new(document: Document, name: str):
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def main():
document = Document("text")
body = document.body
image_path = str(DATA / IMAGE)
uri = document.add_file(image_path)
image_frame = Frame.image_frame(
uri,
size=("6cm", "4cm"),
position=("5cm", "10cm"),
)
# put image frame in a paragraph:
paragraph = Paragraph("")
paragraph.append(image_frame)
body.append(paragraph)
save_new(document, TARGET)
if __name__ == "__main__":
main()
How to add a right aligned picture to a text document
Create an empty text document and add a picture in a frame, aligned to the right or to the left.
Aligning an image requires applying a style to the frame. To do this, use the default frame position style and customize it. The frame position style allows you to choose alignment relative to the paragraph (default) or the page.
recipes/how_to_add_a_right_aligned_picture_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a picture in a frame,
aligned to the right or to the left.
Aligning an image requires applying a style to the frame. To do
this, use the default frame position style and customize it. The
frame position style allows you to choose alignment relative to
the paragraph (default) or the page.
"""
import os
from pathlib import Path
from odfdo import Document, Frame, Paragraph, default_frame_position_style
_DOC_SEQUENCE = 66
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_picture_right"
TARGET = "document.odt"
DATA = Path(__file__).parent / "data"
IMAGE = DATA / "newlogo.png"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_document() -> Document:
"""Generate a document containing two instances of an image,
align one left, the other right.
"""
document = Document("text")
# add an image to the document, remember its URI
image_path = str(DATA / IMAGE)
uri = document.add_file(image_path)
# add a frame style to the document, right alignment
right_style = default_frame_position_style(
name="right_frame", horizontal_pos="right"
)
document.insert_style(right_style)
# add a frame style to the document, left alignment
left_style = default_frame_position_style(
name="left_frame", horizontal_pos="left", horizontal_rel="page"
)
document.insert_style(left_style)
# make the image frames using previous informations
image_frame_right = Frame.image_frame(
uri,
size=("6cm", "4cm"),
position=("0cm", "5cm"),
style=right_style.name,
)
image_frame_left = Frame.image_frame(
uri,
size=("9cm", "6cm"),
position=("0cm", "12cm"),
style=left_style.name,
)
# put image frame in a paragraph:
paragraph = Paragraph("")
paragraph.append(image_frame_right)
paragraph.append(image_frame_left)
document.body.append(paragraph)
return document
def main() -> None:
document = make_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len([s for s in document.get_styles() if s.family == "graphic"]) >= 2
graphic_style_names = [s.name for s in document.get_styles("graphic")]
assert "right_frame" in graphic_style_names
assert "left_frame" in graphic_style_names
if __name__ == "__main__":
main()
How to add a title to a text document
Minimal example of how to add a Header of first level to a text document.
recipes/how_to_add_a_title_to_a_text_document.py
#!/usr/bin/env python
"""Minimal example of how to add a Header of first level to a text document."""
import os
from odfdo import Document, Header
_DOC_SEQUENCE = 67
def generate_document() -> Document:
"""Return a document with a title."""
document = Document("text")
title1 = Header(1, "The Title")
document.body.append(title1)
return document
def main() -> None:
document = generate_document()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
title = document.body.get_header(0)
assert str(title).strip() == "The Title"
if __name__ == "__main__":
main()
Accessing a single element
Example of methods and properties to analyse a document.
These methods return a single element (or None):
- `body.get_note(position)`
- `body.get_paragraph(position)`
- `body.get_header(position)`
recipes/accessing_a_single_element.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.
These methods return a single element (or None):
- `body.get_note(position)`
- `body.get_paragraph(position)`
- `body.get_header(position)`
"""
# Expected result on stdout:
# - Content of the first footnote:
# 1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman)
# (2003). Don't Panic: Douglas Adams and the "Hitchhiker's Guide to the
# Galaxy". Titan Books. pp 144-145. ISBN 1-84023-742-2.
#
# - Content of the paragraph with the word 'Fish'
# In So Long, and Thanks for All the Fish (published in 1984), Arthur
# returns home to Earth, rather surprisingly since it was destroyed when
# he left. He meets and falls in love with a girl named
# [Fenchurch](http://en.wikipedia.org/w/index.php?title=Minor_characters_from_The_Hitchhiker%27s_Guide_to_the_Galaxy%23Fenchurch), and discovers this Earth is a replacement provided by the [dolphin](http://en.wikipedia.org/w/index.php?title=Dolphin)s in their Save the Humans campaign. Eventually he rejoins Ford, who claims to have saved the Universe in the meantime, to hitch-hike one last time and see God's Final Message to His Creation. Along the way, they are joined by Marvin, the Paranoid Android, who, although 37 times older than the universe itself (what with time
# travel and all), has just enough power left in his failing body to read
# the message and feel better about it all before expiring.
#
# - Content of the first Title:
# The Hitchhiker's Guide to the Galaxy
#
# - Content of the last Title:
# Official sites
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 70
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def method_demo(document: Document) -> None:
"""Show some methos examples."""
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Accessing a single element
# To access a single element by name, position or a regular expression on
# the content, use get_xxx_by_<criteria>, where criteria can be position,
# content, or for some of them name, id title, description.
print("- Content of the first footnote:")
print(str(body.get_note(position=0)))
print()
print("- Content of the paragraph with the word 'Fish'")
print(str(body.get_paragraph(content="Fish")))
print("- Content of the first Title:")
print(str(body.get_header(position=0)))
print("- Content of the last Title:")
print(str(body.get_header(position=-1)))
def main() -> None:
document = read_source_document()
method_demo(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
assert str(body.get_note(position=0)).startswith(
"1. [Gaiman, Neil](http://en.wikipedia.org/w/index.php?title=Neil_Gaiman) (2003)"
)
assert str(body.get_paragraph(content="Fish")).endswith("all before expiring.\n")
assert str(body.get_header(position=0)).startswith("The Hitchhiker's Guide")
assert str(body.get_header(position=-1)).startswith("Official sites")
if __name__ == "__main__":
main()
Accessing a list of elements
Example of methods and properties to analyse a document.
These methods or properties return a list of elements:
- `body.headers`
- `body.images`
- `body.paragraphs`
- `body.get_links()`
- `body.get_notes()`
- `body.tables`
- `body.get_paragraphs(content)`
recipes/accessing_a_list_of_elements.py
#!/usr/bin/env python
"""Example of methods and properties to analyse a document.
These methods or properties return a list of elements:
- `body.headers`
- `body.images`
- `body.paragraphs`
- `body.get_links()`
- `body.get_notes()`
- `body.tables`
- `body.get_paragraphs(content)`
"""
# Expected result on stdout:
# 96 get methods are available
# number of headings: 29
# number of images stored: 0
# number of paragraphs: 175
# number of links (URLs): 352
# number of footnotes: 49
# number of tables: 0
# Paragraphs with 'Fish': 4
# Paragraphs with 'answer' and '42': 1
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 75
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def analysis(document: Document) -> dict[str, int]:
"""Returns some statistics about the document."""
result: dict[str, int] = {
"methods": 0,
"headings": 0,
"images": 0,
"paragraphs": 0,
"links": 0,
"footnotes": 0,
"tables": 0,
"fish": 0,
"answer": 0,
}
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Accessing a list of elements
# Should you need to access all elements of a kind, there are the
# get_xxxs methods, where xxx can be paragraph, heading, list, table, ...
# Methods without parameters are accessible through properties.
result["methods"] = " ".join(dir(body)).count("get_")
# Some examples, that you can check against actual content of the odt file:
# See how complex is our wikipedia documents:
result["headings"] = len(body.headers)
result["images"] = len(body.images)
result["paragraphs"] = len(body.paragraphs)
result["links"] = len(body.get_links())
result["footnotes"] = len(body.get_notes())
# Our sample document has no table:
# print("number of tables:", len(body.get_tables()))
result["tables"] = len(body.tables)
# Each get_xxx_list method provides parameters for filtering the results.
# For example headings can be listed by level, annotations by creator, etc.
# Almost all of them accept filtering by style and content using a regular
# expressions.
result["fish"] = len(body.get_paragraphs(content=r"Fish"))
result["answer"] = len(body.get_paragraphs(content=r"answer.*42"))
return result
def display_analysis(stats: dict[str, int]) -> None:
"""Print the stats on stdout."""
print(f"{stats['methods']} get methods are available")
print(f"number of headings: {stats['headings']}")
print(f"number of images stored: {stats['images']}")
print(f"number of paragraphs: {stats['paragraphs']}")
print(f"number of links (URLs): {stats['links']}")
print(f"number of footnotes: {stats['footnotes']}")
print(f"number of tables: {stats['tables']}")
print(f"Paragraphs with 'Fish': {stats['fish']}")
print(f"Paragraphs with 'answer' and '42': {stats['answer']}")
def main() -> None:
document = read_source_document()
stats = analysis(document)
display_analysis(stats)
test_unit(stats)
def test_unit(stats: dict[str, int]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert stats["methods"] == 96
assert stats["headings"] == 29
assert stats["images"] == 0
assert stats["paragraphs"] == 175
assert stats["links"] == 352
assert stats["footnotes"] == 49
assert stats["tables"] == 0
assert stats["fish"] == 4
assert stats["answer"] == 1
if __name__ == "__main__":
main()
Accessing other element from element like list
Accessing elements from element-like list.
Any fetched element is a XML tree context that can be queried, but only on the subtree it
contains. Here are quick examples of iteration on Paragraphs
and Lists
from the document.
recipes/accessing_other_element_from_element_like_list.py
#!/usr/bin/env python
"""Accessing elements from element-like list.
Any fetched element is a XML tree context that can be queried, but only on the subtree it
contains. Here are quick examples of iteration on `Paragraphs` and `Lists` from the document.
"""
# Expected result on stdout:
# Number of available lists in the document: 5
#
# The 4th list contains 9 paragraphs
#
# 1 : [BBC Cult website](http://www.bbc.co.uk/cult/hitchhikers/),
# official website for the [TV show version](http://en.wikipedia.org/w/index.php?title=The_Hitchhiker%27s_Guide_to_the_Galaxy_%28TV_series%29)
# (includes information, links and downloads)
#
# 2 : [BBC Radio 4 website for the 2004-2005
# series](http://www.bbc.co.uk/radio4/hitchhikers/)
#
# 3 : [Official Movie Site](http://hitchhikers.movies.go.com/)
#
# 4 : [The Hitchhiker's Guide to the Galaxy
# (2005 movie)](http://www.imdb.com/title/tt0371724/)at the
# [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)
#
# 5 : [The Hitch Hikers Guide to the Galaxy
# (1981 TV series)](http://www.imdb.com/title/tt0081874/)at the
# [Internet Movie Database](http://en.wikipedia.org/w/index.php?title=Internet_Movie_Database)
#
# 6 : [h2g2](http://www.bbc.co.uk/h2g2/guide/)
#
# 7 : [Encyclopedia of Television](http://www.museum.tv/archives/etv/H/htmlH/hitch-hickers/hitch-hickers.htm)
#
# 8 : [British Film Institute Screen Online](http://www.screenonline.org.uk/tv/id/560180/index.html)
# page devoted to the TV series
#
# 9 : [DC Comics H2G2 site](http://www.dccomics.com/graphic_novels/?gn=1816)
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 80
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def analyse_list(document: Document):
# The body object is an XML element from which we can access one or several
# other elements we are looking for.
body = document.body
# Any element is a context for navigating but only on the subtree it
# contains. Just like the body was, but since the body contains all content,
# we didn't see the difference.
# Let's try the lists:
print("Number of available lists in the document:", len(body.lists))
print()
list4 = body.get_list(position=4)
print(f"The 4th list contains {len(list4.paragraphs)} paragraphs")
print()
# Now print the list content
paragraphs = list4.paragraphs
for count, paragraph in enumerate(paragraphs):
print(count + 1, ":", paragraph)
def main():
document = read_source_document()
analyse_list(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
list4 = body.get_list(position=4)
paragraphs = list4.paragraphs
assert len(body.lists) == 5
assert len(list4.paragraphs) == 9
assert str(paragraphs[0]).startswith("[BBC Cult website](http")
assert str(paragraphs[8]).startswith("[DC Comics H2G2 site](http")
if __name__ == "__main__":
main()
How to add a list to a text document
Create an empty text document and add a list.
recipes/how_to_add_a_list_to_a_text_document.py
#!/usr/bin/env python
"""Create an empty text document and add a list."""
import os
from pathlib import Path
# Lists are a class: List
from odfdo import Document, List
_DOC_SEQUENCE = 90
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_list"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
"""Return a text document containing a list."""
document = Document("text")
body = document.body
body.clear()
some_list = List(["chocolate", "tea", "coffee"])
# The list factory accepts a Python list of strings and list items.
body.append(some_list)
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert (document.get_formatted_text()).strip() == "- chocolate\n- tea\n- coffee"
if __name__ == "__main__":
main()
How to add a manual page break
Adding a manual page break to a text document.
Page breaks are build by a specific style. However, odfdo provides a PageBreak class to facilitate the inclusion of page breaks. This recipe illustrates the use of PageBreak and the underlying styling mechanism.
recipes/how_to_add_a_manual_page_break.py
#!/usr/bin/env python
"""Adding a manual page break to a text document.
Page breaks are build by a specific style. However, odfdo provides a PageBreak
class to facilitate the inclusion of page breaks. This recipe illustrates
the use of PageBreak and the underlying styling mechanism.
"""
import os
from pathlib import Path
from odfdo import Document, PageBreak, Paragraph, Style
_DOC_SEQUENCE = 95
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "page_break"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
"""Return a text document containing page breaks."""
document = Document()
body = document.body
body.clear()
# here a simple way to insert a page break with odfdoshortcuts:
document.add_page_break_style()
body.append(Paragraph("First paragraph"))
body.append(PageBreak())
body.append(Paragraph("Second paragraph"))
# here is a different way to insert a page break:
page_break_style = Style("paragraph", name="page_break_before")
page_break_style.set_properties({"fo:break-before": "page"})
document.insert_style(page_break_style)
empty_paragraph = Paragraph("", style="page_break_before")
body.append(empty_paragraph)
body.append(Paragraph("Third paragraph"))
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
content = document.body.serialize(pretty=True)
assert "First paragraph" in content
assert "Second paragraph" in content
assert "Third paragraph" in content
assert '<text:p text:style-name="odfdopagebreak"></text:p>' in content
assert '<text:p text:style-name="page_break_before"></text:p>' in content
if __name__ == "__main__":
main()
Create a basic drawing
Insert a circle and a lot of lines (a fractal) in a text document.
recipes/create_a_basic_drawing.py
#!/usr/bin/env python
"""Insert a circle and a lot of lines (a fractal) in a text document."""
from __future__ import annotations
import cmath
import os
from pathlib import Path
from odfdo import Document, EllipseShape, Header, LineShape, Paragraph
_DOC_SEQUENCE = 100
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_drawing"
TARGET = "koch.odt"
CYCLES = 4 # beware, 5 is big, 6 is too big to display...
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def cm(x: float) -> str:
"""Return the value as cm string."""
return f"{x:.2f}cm"
class Vector:
"""Vector class with Koch calculation."""
def __init__(self, a: float | complex, b: float | complex) -> None:
self.a = a
self.b = b
def koch_split(self) -> list[Vector]:
c = self.a + 1.0 / 3.0 * (self.b - self.a)
d = self.a + 2.0 / 3.0 * (self.b - self.a)
m = 0.5 * (self.a + self.b)
e = m + (d - c) * complex(0, -1)
return [Vector(self.a, c), Vector(c, e), Vector(e, d), Vector(d, self.b)]
def centimeter(self, index: int) -> tuple[str, str]:
if index == 0:
m = self.a
else:
m = self.b
return (cm(m.real), cm(m.imag))
def koch(vector_list: list[Vector], cycles: int = 2) -> list[Vector]:
"""Generate a Koch fractal."""
if cycles <= 0:
return vector_list
else:
new_vector_list: list[Vector] = []
for vector in vector_list:
new_vector_list.extend(vector.koch_split())
return koch(new_vector_list, cycles - 1)
def make_fractal_coords(
side: float,
vert_position: float,
) -> tuple[float | complex, list[Vector]]:
"""Return center and coordinates of a Koch fractal image."""
orig = complex((17 - side) / 2.0, vert_position)
v1 = Vector(orig, orig + complex(side, 0))
v2 = Vector(v1.b, orig + cmath.rect(side, cmath.pi / 3))
v3 = Vector(v2.b, orig)
center: float | complex = (v1.a + v1.b + v2.b) / 3
vector_list = koch([v1, v2, v3], cycles=CYCLES)
return center, vector_list
def generate_document() -> Document:
"""Generate a document with image in it."""
document = Document("text")
body = document.body
print("Making some Koch fractal")
title = Header(1, "Some Koch fractal")
body.append(title)
style = document.get_style("graphic")
style.set_properties({"svg:stroke_color": "#0000ff"})
style.set_properties(fill_color="#ffffcc")
paragraph = Paragraph("")
body.append(paragraph)
# some computation of oordinates
center, vector_list = make_fractal_coords(side=12.0, vert_position=8.0)
# create a circle
radius = 8.0
pos = center - complex(radius, radius)
circle = EllipseShape(
size=(cm(radius * 2), cm(radius * 2)),
position=(cm(pos.real), cm(pos.imag)),
)
paragraph.append(circle)
# create a drawing with a lot of lines
paragraph.append(f"number of lines: {len(vector_list)}")
for vector in vector_list:
line = LineShape(p1=vector.centimeter(0), p2=vector.centimeter(1))
paragraph.append(line)
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.paragraphs) == 2
assert len(document.body.get_draw_lines()) == 768
if __name__ == "__main__":
main()
Add private annotations to a document
Add not printable annotations to a document.
Annotations are notes that do not appear in the document but typically on a side bar in a desktop application. So they are not printed.
recipes/add_private_annotations_to_a_document.py
#!/usr/bin/env python
"""Add not printable annotations to a document.
Annotations are notes that do not appear in the document but typically
on a side bar in a desktop application. So they are not printed.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph
_DOC_SEQUENCE = 110
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "annotated"
TARGET = "annotated_document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def base_document() -> Document:
"""Generate a basic document."""
document = Document("text")
body = document.body
body.append(Header(1, "De la Guerre des Gaules - Livre V"))
body.append(Header(2, "Préparatifs d'expédition en Bretagne"))
body.append(
Paragraph(
"Sous le consulat de Lucius Domitius et d'Appius Claudius, "
"César, quittant les quartiers d'hiver pour aller en Italie, "
"comme il avait coutume de le faire chaque année, ordonne aux "
"lieutenants qu'il laissait à la tête des légions de construire, "
"pendant l'hiver, le plus de vaisseaux qu'il serait possible, "
"et de réparer les anciens."
)
)
body.append(Header(2, "La Bretagne"))
body.append(
Paragraph(
"Cette île est de forme triangulaire ; l'un des côtés regarde "
"la Gaule. Des deux angles de ce côté, l'un est au levant, "
"vers le pays de Cantium, où abordent presque tous les vaisseaux "
"gaulois ; l'autre, plus bas, est au midi. La longueur de ce côté "
"est d'environ cinq cent mille pas. "
)
)
return document
def insert_annotation(document: Document) -> None:
"""Insert a not printable annotation in a document."""
body = document.body
paragraph = body.get_paragraph(content="consulat")
# Annotations are inserted like notes but they are simpler:
# Annotation arguments:
# after => The word after what the annotation is inserted.
# body => The annotation itself, at the end of the page.
# creator => The author of the annotation.
# date => A datetime value, by default datetime.now().
paragraph.insert_annotation(
after="Domitius",
body="Talking about Lucius Domitius",
creator="Luis",
)
def main() -> None:
document = base_document()
insert_annotation(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_annotations(creator="Luis")) == 1
if __name__ == "__main__":
main()
Accessibility check on a document
Basic Accessibility test: check, for every picture in a document, if there is:
- a title (svg_title),
- a description (svg_description)
or, at least, some caption text.
See test file planes.odt
file and the result of the script.
recipes/accessibility_check_on_a_document.py
#!/usr/bin/env python
"""Basic Accessibility test: check, for every picture in a document, if
there is:
- a title (svg_title),
- a description (svg_description)
or, at least, some caption text.
See test file `planes.odt` file and the result of the script.
"""
# Expected result on stdout:
# The document displays 3 pictures:
# - pictures with a title: 2
# - pictures with a description: 1
# - pictures with a caption: 0
# Image: 100000000000013B000000D345859F604DCE636A.jpg
# Name: graphics2, Title: Spitfire, general view, Description:Green spitfire in a hall, view from left front., Caption:None
# Image: 100000000000013B000000D3F908DA0A939D2F4B.jpg
# Name: graphics3, Title: Spitfire, detail, Description:None, Caption:None
# Image: 100000000000013B000000D375CEBFD6D7CB7CE9.jpg
# Name: graphics1, Title: None, Description:None, Caption:None
import os
import sys
from pathlib import Path
from typing import Any
from odfdo import Document
_DOC_SEQUENCE = 200
DATA = Path(__file__).parent / "data"
SOURCE = "planes.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def accessibility_evaluator(document: Document) -> dict[str, Any]:
"""Count for each images: titles, caption,description."""
result: dict[str, Any] = {
"images": [],
"titles": 0,
"descriptions": 0,
"captions": 0,
}
# We want the images of the document.
body = document.body
images = body.images
for image in images:
uri = image.url
filename = uri.rpartition("/")[2]
frame = image.parent
name = frame.name
title = frame.svg_title
description = frame.svg_description
link = frame.parent
# this part requires some ODF know how:
caption = None
if link.tag == "draw:a":
caption = link.get_attribute("office:name")
result["images"].append(
f"Image: {filename}\n"
f" Name: {name}, Title: {title}, "
f"Description:{description}, Caption:{caption}"
)
if title:
result["titles"] += 1
if description:
result["descriptions"] += 1
if caption:
result["captions"] += 1
return result
def display_accessibilty(stats: dict[str, Any]) -> None:
"""Print the stats on stdout."""
print(f"The document displays {len(stats['images'])} pictures:")
print(f" - pictures with a title: {stats['titles']}")
print(f" - pictures with a description: {stats['descriptions']}")
print(f" - pictures with a caption: {stats['captions']}")
print()
for content in stats["images"]:
print(content)
def main() -> None:
document = read_source_document()
stats = accessibility_evaluator(document)
display_accessibilty(stats)
test_unit(stats)
def test_unit(stats: dict[str, Any]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(stats["images"]) == 3
assert stats["titles"] == 2
assert stats["descriptions"] == 1
assert stats["captions"] == 0
if __name__ == "__main__":
main()
Add logo on presentation
Insert an image (e.g. the logo of an event, organization or a Creative Commons
attribution) with size x,y
at position x2,y2
on a number of slides in a
presentation slide deck.
recipes/add_logo_on_presentation.py
#!/usr/bin/env python
"""Insert an image (e.g. the logo of an event, organization or a Creative Commons
attribution) with size `x,y` at position `x2,y2` on a number of slides in a
presentation slide deck.
"""
import os
import sys
from pathlib import Path
# reading image size requires a graphic library
from PIL import Image
from odfdo import Document, Frame
_DOC_SEQUENCE = 250
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "add_logo"
TARGET = "presentation.odp"
DATA = Path(__file__).parent / "data"
SOURCE = "presentation_wo_logo.odp"
LOGO = DATA / "newlogo.png"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_image_size(path: Path, size: float) -> tuple[str, str]:
"""Returns the display size (width, height) from the image path and the
largest dimension."""
width, height = Image.open(path).size
ratio = max(width / size, height / size)
return (f"{width / ratio:.2f}cm", f"{height / ratio:.2f}cm")
def add_logo(presentation: Document) -> None:
"""Add an image on a presentation."""
image_position = ("1.50cm", "1.50cm")
svg_title = "New Logo"
svg_description = "The new logo with blue background"
image_size = make_image_size(LOGO, 4.0)
presentation_body = presentation.body
uri = presentation.add_file(LOGO)
for slide in presentation_body.get_draw_pages():
# Create a frame for the image
image_frame = Frame.image_frame(
image=uri,
text="", # Text over the image object
size=image_size, # Display size of image
anchor_type="page",
page_number=None,
position=image_position,
style=None,
)
image_frame.svg_title = svg_title
image_frame.svg_description = svg_description
slide.append(image_frame)
def main() -> None:
document = read_source_document()
add_logo(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
slides = document.body.get_draw_pages()
assert len(slides) == 11
for slide in slides:
assert len(slide.get_images()) == 1
if __name__ == "__main__":
main()
Get pictures from document odt
Retrieve all the pictures embeded in an .odt file.
recipes/get_pictures_from_document_odt.py
#!/usr/bin/env python
"""Retrieve all the pictures embeded in an .odt file."""
import os
import sys
from pathlib import Path
from pprint import pformat
from odfdo import Document
_DOC_SEQUENCE = 260
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA)
# Remark: the document is badly made: the pictures are not displayed in the
# text, but are sill inside the document !
SOURCE = "collection.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "found_pics"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def read_pictures(document: Document) -> list[Path]:
"""Return the list of files retrieved from the document."""
parts = document.parts
print("ODF parts of the document:")
print(pformat(parts))
print()
# we use the get_part function from odfdo to get the actual content
# of the image, to copy the images out of the .odt file:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# images are DrawImage instances
for draw_image in document.body.images:
# where is the image actual content in the file:
url = draw_image.url
image_content = document.get_part(url)
origin_path = Path(url)
destination_path = OUTPUT_DIR / origin_path.name
destination_path.write_bytes(image_content)
result = sorted(OUTPUT_DIR.glob("*"))
print(f"Picture files in {OUTPUT_DIR}:")
for file in result:
print(file.name)
return result
def main() -> None:
document = read_source_document()
path_list = read_pictures(document)
test_unit(path_list)
def test_unit(path_list: list[Path]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(path_list) == 9
if __name__ == "__main__":
main()
Change image in many documents
Change an image in many ODF files.
This recipe is suitable for the scenario where an organization is moving from one company logo to another and needs to replace the logo in several hundred existing documents.
recipes/change_image_in_many_documents.py
#!/usr/bin/env python
"""Change an image in many ODF files.
This recipe is suitable for the scenario where an organization
is moving from one company logo to another and needs to replace
the logo in several hundred existing documents.
"""
import os
from hashlib import sha256
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 270
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "new_logo"
DATA = Path(__file__).parent / "data"
OLD_PRESENTATIONS = DATA / "old_presentations"
OLD_LOGO = OLD_PRESENTATIONS / "oldlogo.png"
NEW_LOGO = DATA / "newlogo.png"
def save_modified(document: Document) -> None:
"""Save a modified Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
name = Path(document.path).name
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path)
def footprint(content: bytes) -> str:
"""Return sha256 digest of a bytes content."""
return sha256(content).hexdigest()
def update_document_logo(
path: Path,
old_hash: str,
new_content: bytes,
stats: dict[str, int],
) -> None:
"""Update the logo inside a Document."""
stats["files"] += 1
if not path.suffix.lower().startswith(".od"):
return
try:
document = Document(path)
except Exception:
return
stats["odf_files"] += 1
document_changed = False
for image in document.body.images:
image_url = image.url
if not image_url:
continue
try:
image_content = document.get_part(image_url)
except KeyError:
print("- not found inside document:", path, end=" ")
print(" image URL:", image_url)
continue
if footprint(image_content) == old_hash:
document.set_part(image_url, new_content)
document_changed = True
if document_changed:
save_modified(document)
stats["updated_files"] += 1
def update_logos() -> dict[str, int]:
"""Update logo image in all documents."""
result: dict[str, int] = {
"files": 0,
"odf_files": 0,
"updated_files": 0,
}
old_hash = footprint(OLD_LOGO.read_bytes())
# making the new image content :
buffer = Document("text")
url = buffer.add_file(str(NEW_LOGO))
new_content = buffer.get_part(url)
for path in OLD_PRESENTATIONS.glob("**/*"):
update_document_logo(path, old_hash, new_content, result)
return result
def main() -> None:
stats = update_logos()
print(f"Files: {stats['files']}")
print(f"ODF files: {stats['odf_files']}")
print(f"Updated files: {stats['updated_files']}")
test_unit(stats)
def test_unit(stats: dict[str, int]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert (stats["files"]) == 3
assert (stats["odf_files"]) == 2
assert (stats["updated_files"]) == 2
if __name__ == "__main__":
main()
Concatenate presentations
Concatenate several presentations (including presentations found in sub directories), possibly merge styles and images. Result for style may vary.
recipes/concatenate_presentations.py
#!/usr/bin/env python
"""Concatenate several presentations (including presentations found in sub
directories), possibly merge styles and images. Result for style may vary.
"""
import os
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 280
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "concatenate"
TARGET = "presentation.odp"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def concatenate_presentations(path: Path) -> Document:
"""Return a presentation containing a copy of all presentations in path."""
concat_presentation = Document("presentation")
concat_presentation.body.clear()
concat_presentation.delete_styles()
count = 0
for presentation_path in path.glob("**/*.odp"):
count += 1
add_presentation(concat_presentation, presentation_path)
nb_slides = len(concat_presentation.body.get_draw_pages())
print(f"{count} presentations concatenated, {nb_slides} slides.")
return concat_presentation
def add_presentation(concat_presentation: Document, path: Path) -> None:
"""Using odfdo to open .odp document and copy content and styles."""
try:
document = Document(path)
except Exception:
return
concat_presentation.merge_styles_from(document)
# add all slides
dest_body = concat_presentation.body
dest_manifest = concat_presentation.manifest
manifest = document.manifest
slides = document.body.get_draw_pages()
print(f"- {path.name} has {len(slides)} slides")
for slide in slides:
slide = slide.clone
# dont forget images:
for image in slide.images:
uri = image.url
media_type = manifest.get_media_type(uri)
dest_manifest.add_full_path(uri, media_type)
concat_presentation.set_part(uri, document.get_part(uri))
# append slide, expecting nothing good about its final style
dest_body.append(slide)
def main() -> None:
document = concatenate_presentations(DATA)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.get_draw_pages()) == 38
if __name__ == "__main__":
main()
Make a presentation from pictures of a text document
Open a .odt file with pictures in it, find and analyse all the images, create a new .odp presentation, display all the pictures in the presentation, one image per frame.
recipes/make_a_presentation_from_pictures_of_a_text_document.py
#!/usr/bin/env python
"""Open a .odt file with pictures in it, find and analyse all the images,
create a new .odp presentation, display all the pictures in the presentation,
one image per frame.
"""
import io
import os
import sys
from pathlib import Path
# analyzing embedded image requires the Pillow library
from PIL import Image
from odfdo import Document, DrawPage, Frame
_DOC_SEQUENCE = 285
DATA = Path(__file__).parent / "data"
SOURCE = DATA / "collection.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_images_in_odt"
TARGET = "presentation.odp"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def embedded_image_ratio(href: str, content: bytes) -> float:
"""Calculates the aspect ratio of an image content in bytes."""
image_stream = io.BytesIO(content)
img = Image.open(image_stream)
width, height = img.size
ratio = 1.0 * width / height
print(f"Image {href}, size: {width}x{height}, ratio:{ratio:.2f}")
return ratio
def compute_size(max_length: float, ratio: float) -> tuple[float, float]:
"""Compute the size the image will have from maximum length and ratio of dimensions
of source image."""
width = max_length * ratio
height = max_length
if ratio > 1.0:
width /= ratio
height /= ratio
return width, height
def generate_document(source: Document) -> Document:
"""Return a presentation document made from pictures read from
the source document."""
# Making of the output Presentation document :
presentation = Document("presentation")
# Presentation got a body in which elements are stored
presentation_body = presentation.body
presentation_body.clear()
presentation_manifest = presentation.manifest
# For each image, we create a page in the presentation and display the image
# and some text on this frame.
# First, get all image elements available in document:
images_source = source.body.images
manifest_source = source.manifest
for image in images_source:
# we use the get_part function from odfdo to get the actual content
# of the images, with the URI link to the image as argument
uri = image.url
# weight = len(doc_source.get_part(uri)) # only for info
# print "image %s , size in bytes: %s" % (uri, weight)
content: bytes = source.get_part(uri) # actual image content
name = uri.split("/")[-1] # lets make a file name for image
# Compute the display size of the image on the final page
ratio = embedded_image_ratio(uri, content)
# max size of the greatest side: 16 cm
width, height = compute_size(16.0, ratio)
# Create an underlying page for the image and the text
page = DrawPage("page " + name)
# Create a frame for the image
image_frame = Frame.image_frame(
image=uri,
text="", # Text over the image object
size=(f"{width}cm", f"{height}cm"), # Display size of image
anchor_type="page",
page_number=None,
position=("3.5cm", "3.5 cm"),
style=None,
)
# Add some text object somehere on the frame, with a text frame
legend = f"Image {name} from Wikipedia document / {SOURCE.name}"
text_frame = Frame.text_frame(
legend,
size=("26cm", "2cm"),
position=("0.5cm", "0.5cm"),
style="Standard",
text_style="Standard",
)
# Append all the component, do not forget to add the actuel image file
# into the Picture global directory of the presentation file with set_part
page.append(text_frame)
page.append(image_frame)
presentation_body.append(page)
# for the same operation from a local filesystem image, just use:
# presentation_output.add_file(uri)
media_type = manifest_source.get_media_type(uri)
presentation_manifest.add_full_path(uri, media_type)
# actually store the image content in the new document:
presentation.set_part(uri, content)
return presentation
def main() -> None:
images_source = read_source_document()
document = generate_document(images_source)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert (len(document.body.images)) == 9
assert (len(document.body.get_draw_pages())) == 9
if __name__ == "__main__":
main()
Make presentation from images
Create a presentation from a some images in a given directory, where each image is put on the center of its own page scaled to either the maximum available size, prefered maximum size, or cover the full page and lose some info.
recipes/make_presentation_from_images.py
#!/usr/bin/env python
"""Create a presentation from a some images in a given directory,
where each image is put on the center of its own page scaled to either
the maximum available size, prefered maximum size, or cover the full
page and lose some info.
"""
import os
from pathlib import Path
# analyzing embedded image need Pillow library
from PIL import Image
from odfdo import Document, DrawPage, Frame
_DOC_SEQUENCE = 286
IMAGES = Path(__file__).parent / "data" / "images"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_from_images"
TARGET = "presentation.odp"
MAX_SIZE = 15.0 # feel free to customize
CROP_SIZE = False # feel free to customize
# Size (in cm) of a slide : (default page-layout)
SLIDE_W, SLIDE_H = 28.0, 21.0 # 4/3 screen
# FIXME: this is the default page-layout.
# - Changing the style of the page-layout by program is not done in this script
# - an other way, merging with external page-layout/master-page requires
# extra files, out of the scope for this script.
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
class ImageInfo:
"""Class to store informations about an image.
Principle :
- original image are left unmodified by the script
- only the size they should appear is computed
- later, the display engine (say LibreOffice) will merge this display
information with other informations, like the size of the page
(page-layout) and should act like a mask against the "big" croped image.
"""
def __init__(self, path: Path) -> None:
self.path = path
self.size = None
self.disp_w = self.disp_h = None
self.pos_x = self.pos_y = None
def adapt(self) -> bool:
if not self.path.is_file():
return False
try:
self.size = Image.open(self.path).size
except OSError:
# Not an image ?
return False
width, height = self.size
if MAX_SIZE:
ratio = max(width / MAX_SIZE, height / MAX_SIZE)
display_w = width / ratio
display_h = height / ratio
elif CROP_SIZE:
ratio = min(width / SLIDE_W, height / SLIDE_H)
display_w = width / ratio
display_h = height / ratio
else:
ratio = max(width / SLIDE_W, height / SLIDE_H)
display_w = width / ratio
display_h = height / ratio
self.disp_w = f"{display_w:2f}cm"
self.disp_h = f"{display_h:2f}cm"
self.pos_x = f"{(SLIDE_W - display_w) / 2:2f}cm"
self.pos_y = f"{(SLIDE_H - display_h) / 2:2f}cm"
print(self.path.name, self.disp_w, self.disp_h)
return True
def collect_images() -> list[ImageInfo]:
pool = []
for path in IMAGES.glob("**/*"):
image_info = ImageInfo(path)
if image_info.adapt():
pool.append(image_info)
return pool
def make_presentation(images_pool: list[ImageInfo]) -> Document:
"""Return a presentation made of images."""
if not images_pool: # unable to find images
print("No image found !")
return None
presentation = Document("presentation")
# Presentation got a body in which content is stored
body = presentation.body
body.clear()
# For each image, we create a page in the presentation and display the image
# and some text on this frame
for image in images_pool:
# add the file to the document
uri = presentation.add_file(image.path)
# Create an underlying page for the image and the text
page = DrawPage(f"Page {image.path.name}")
# Create a frame for the image
image_frame = Frame.image_frame(
image=uri,
name=image.path.name,
text="", # Text over the image object
size=(image.disp_w, image.disp_h), # Display size of image
anchor_type="page",
page_number=None,
position=(image.pos_x, image.pos_y),
style=None,
)
page.append(image_frame)
body.append(page)
return presentation
def main() -> None:
images_pool = collect_images()
presentation = make_presentation(images_pool)
if presentation is None:
print("Something went wrong.")
exit(0)
test_unit(presentation)
save_new(presentation, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
count = len([item for item in body.children if isinstance(item, DrawPage)])
assert count == 3
if __name__ == "__main__":
main()
Make a presentation from text with different styles
Each line of the text becomes a slide of the presentation, we change of style depending on the length of text line.
recipes/make_a_presentation_from_text_with_different_styles.py
#!/usr/bin/env python
"""Each line of the text becomes a slide of the presentation, we change of style
depending on the length of text line.
"""
import os
from pathlib import Path
from odfdo import Document, DrawPage, Frame, Style
_DOC_SEQUENCE = 287
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled_prez"
TARGET = "presentation.odp"
CONTENT = """123
azertyuiop
azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop azertyuiop
end.
""".splitlines()
# If text length is bigger then first value, use second value as font size:
TEXT_LEN_FONT_SIZE = [
(95, 10),
(80, 11),
(65, 14),
(50, 16),
(40, 20),
(30, 24),
(20, 32),
(10, 40),
(5, 44),
(-1, 48),
]
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_base_style() -> Style:
"""Creating a smooth style for the graphic item."""
base_style = Style(
"graphic",
name="Gloup48",
parent="standard",
stroke="none",
fill_color="#b3b3b3",
textarea_vertical_align="middle",
padding_top="1cm",
padding_bottom="1cm",
padding_left="1cm",
padding_right="1cm",
line_distance="0cm",
guide_overhang="0cm",
guide_distance="0cm",
)
base_style.set_properties(area="paragraph", align="center")
base_style.set_properties(
area="text",
color="#dd0000",
text_outline="false",
font="Liberation Sans",
font_family="Liberation Sans", # compatibility
font_style_name="Bold",
family_generic="swiss",
size="48pt",
weight="bold",
)
return base_style
def add_styles(document: Document) -> None:
"""Generate all styles usable by the presentation as variations of a
base style."""
base_style = create_base_style()
for _, font_size in TEXT_LEN_FONT_SIZE:
variant_style: Style = base_style.clone
variant_style.set_attribute("style:name", f"Gloup{font_size}")
variant_style.set_properties(area="text", size=f"{font_size}pt")
document.insert_style(variant_style)
def generate_document() -> Document:
"""Generate a Presentation Document with different styles."""
presentation = Document("presentation")
body = presentation.body
body.clear()
add_styles(presentation)
for count, blurb in enumerate(CONTENT):
text = blurb
name = f"{count + 1} - {text[:10]}"
page = DrawPage(name)
# choosing some style:
for text_len, font_size in TEXT_LEN_FONT_SIZE:
if len(text) > text_len:
size = font_size
break
style_name = f"Gloup{size}"
text_frame = Frame.text_frame(
text,
size=("24cm", "2cm"),
position=("2cm", "8cm"),
style=style_name,
text_style=style_name,
)
page.append(text_frame)
body.append(page)
return presentation
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
count = len([item for item in body.children if isinstance(item, DrawPage)])
assert count == len(CONTENT)
first_page = body.children[0]
assert str(first_page).strip() == CONTENT[0].strip()
last_page = body.children[-1]
assert str(last_page).strip() == CONTENT[-1].strip()
if __name__ == "__main__":
main()
Extract and reorder slides
Create a new presentation from a previous one by extracting some slides, in a different order.
recipes/extract_and_reorder_slides.py
#!/usr/bin/env python
"""Create a new presentation from a previous one by extracting some slides,
in a different order.
"""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 290
DATA = Path(__file__).parent / "data"
SOURCE = "presentation_base.odp"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "presentation_extracted"
TARGET = "presentation.odp"
SLIDES_ORDER = (3, 5, 2, 2)
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def extract_slides(presentation_base: Document) -> Document:
"""Return a new presentation from the slides of the base document
by copying slides, in a different order.
"""
extracted = Document("presentation")
# Important, copy styles too:
extracted.delete_styles()
extracted.merge_styles_from(presentation_base)
extracted.body.clear()
for index in SLIDES_ORDER:
try:
slide_position = index - 1
slide = presentation_base.body.get_draw_page(position=slide_position)
except Exception: # noqa: S112
continue
if slide is None:
continue
slide = slide.clone
extracted.body.append(slide)
return extracted
def main() -> None:
document = read_source_document()
extracted = extract_slides(document)
test_unit(extracted)
save_new(extracted, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
slides = document.body.get_draw_pages()
assert len(slides) == len(SLIDES_ORDER)
# slide content are ~their page number
for idx, value in enumerate(SLIDES_ORDER):
assert (str(slides[idx]).strip()) == str(value)
if __name__ == "__main__":
main()
Change values of a chart inside a document
Open a text document with an embedded chart and change some values.
recipes/change_values_of_a_chart_inside_a_document.py
#!/usr/bin/env python
"""Open a text document with an embedded chart and change some values."""
import os
import sys
from pathlib import Path
# for cell style
from odfdo import Document
_DOC_SEQUENCE = 295
DATA = Path(__file__).parent / "data"
SOURCE = "chart.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "modified_chart"
TARGET = "modified_chart.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def change(document: Document) -> None:
"""Change some values in the embedded chart table."""
# list the parts if needed
print(document.parts)
# -> ['mimetype', 'ObjectReplacements/Object 1', 'Object 1/meta.xml', 'Object 1/styles.xml', 'Object 1/content.xml', ...
part = document.get_part("Object 1/content.xml")
body = part.body
table = body.get_table(0)
# if needed, get the values:
values = table.get_values()
print(values)
# -> [
# [None, "", "Column 2", "Column 3"],
# ["Row 1", Decimal("NaN"), 10, 20],
# ["Row 2", Decimal("NaN"), 30, 40],
# ["Row 3", Decimal("NaN"), 50, 360],
# ["Row 4", Decimal("NaN"), Decimal("9.02"), Decimal("6.2")],
# ]
# change some values
table.set_value("A2", "label changed")
table.set_value("D3", 4000)
table.set_value("D4", 4321)
def main() -> None:
document = read_source_document()
change(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
part = document.get_part("Object 1/content.xml")
table = part.body.get_table(0)
assert table.get_value("A3") == "Row 2"
assert table.get_value("A2") == "label changed"
assert table.get_value("D3") == 4000
assert table.get_value("D4") == 4321
if __name__ == "__main__":
main()
Add text span styles
Transform a not styled document into a multi styled document, by changing size and color of each parts of words.
recipes/add_text_span_styles.py
#!/usr/bin/env python
"""Transform a not styled document into a multi styled document,
by changing size and color of each parts of words.
"""
import os
import sys
from itertools import chain
from pathlib import Path
from odfdo import Document, Style
_DOC_SEQUENCE = 300
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled3"
SOURCE = "dormeur_notstyled.odt"
TARGET = "dormeur_styled.odt"
RANDOM_SEED = 1234
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
class SimpleRandom:
"""Q&D reproductible random generator for tests."""
MODULUS = 2**31 - 1
MAXI = 2**31 - 2
def __init__(self) -> None:
self.current = 16807
def _next_number(self) -> None:
self.current = (16807 * self.current) % self.MODULUS
def set_seed(self, seed: int = 16807) -> None:
self.current = seed
def randint(self, max_value: int) -> int:
self._next_number()
return int(self.current * max_value / self.MAXI + 1)
def color_hex(r: int, g: int, b: int) -> str:
"""Convert red, green, blue values to #rgb string."""
return f"#{r:02X}{g:02X}{b:02X}"
def style_name_index(index: int) -> str:
"""Generate a style_name."""
return f"rnd_{index}"
def generate_random_styles(document: Document, rnd: SimpleRandom) -> None:
"""Generate 64 random styles."""
for index in range(1, 64):
style = Style(
"text",
name=style_name_index(index),
color=color_hex(rnd.randint(256), rnd.randint(256), rnd.randint(256)),
size=f"{8 + index / 5}",
)
document.insert_style(style)
def add_styles(document: Document) -> None:
"""Change randomly size and color of words."""
rnd = SimpleRandom()
body = document.body
generate_random_styles(document, rnd)
words = sorted(set(str(body).split()))
for word in words:
style_name = style_name_index(rnd.randint(64))
for paragraph in chain(body.paragraphs, body.headers):
# apply style to each text matching with the regex of some word
paragraph.set_span(style_name, regex=word)
def main():
document = read_source_document()
add_styles(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.spans) == 157
if __name__ == "__main__":
main()
How to copy some style from another document
Minimal example of copy of a style from another document.
Document.get_style() main parameters: family : The family of the style, text styles apply on individual characters. display_name : The name of the style as we see it in a desktop application. Styles have an internal name (“Yellow_20_Highlight” in this example) but here we use the display_name instead.
recipes/how_to_copy_some_style_from_another_document.py
#!/usr/bin/env python
"""Minimal example of copy of a style from another document.
Document.get_style() main parameters:
family : The family of the style, text styles apply on individual
characters.
display_name : The name of the style as we see it in a desktop
application. Styles have an internal name
(“Yellow_20_Highlight” in this example) but here we use
the display_name instead.
"""
import os
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 310
DATA = Path(__file__).parent / "data"
SOURCE = "lpod_styles.odt"
def generate_document() -> Document:
"""Return a document with a style read from another document."""
document = Document("text")
body = document.body
body.clear()
styled_source = Document(DATA / SOURCE)
highlight_style = styled_source.get_style(
family="text", display_name="Yellow Highlight"
)
document.insert_style(highlight_style, automatic=True)
return document
def main() -> None:
document = generate_document()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
style = document.get_style(family="text", display_name="Yellow Highlight")
assert style.display_name == "Yellow Highlight"
if __name__ == "__main__":
main()
Copy style from another document
Copy the styles from an existing document.
For more advanced version, see the odfdo-style script.
recipes/copy_style_from_another_document.py
#!/usr/bin/env python
"""Copy the styles from an existing document.
For more advanced version, see the odfdo-style script.
"""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 320
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
# copied here from the odfdo package:
STYLE_SOURCE = DATA / "lpod_styles.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "styled1"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def merge_styles(document: Document) -> None:
# We want to change the styles of collection2.odt,
# we know the odfdo_styles.odt document contains an interesting style,
# So let's first fetch the style:
style_document = Document(STYLE_SOURCE)
# We could change only some styles, but here we want a clean basis:
document.delete_styles()
# And now the actual style change:
document.merge_styles_from(style_document)
def main() -> None:
document = read_source_document()
merge_styles(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.get_styles()) == 75
if __name__ == "__main__":
main()
Create basic text styles
Create basic text styles with the Style class API.
Styles are applied to entire paragraphs or headings, or to words using Span.
The create_style_steel() and create_style_special() functions below are examples of styles that combine the area=”text” and area=”Graphic” or area=”paragraph” properties. The Style class API allows for basic styling, but for more complex situations, it is recommended to use a document as a template or copy the XML definition of an existing style. The recipe change_paragraph_styles_methods.py shows these different methods.
recipes/create_basic_text_styles.py
#!/usr/bin/env python
"""Create basic text styles with the Style class API.
Styles are applied to entire paragraphs or headings, or to words using Span.
The create_style_steel() and create_style_special() functions below are
examples of styles that combine the area="text" and area="Graphic" or
area="paragraph" properties. The Style class API allows for basic styling,
but for more complex situations, it is recommended to use a document as a
template or copy the XML definition of an existing style. The recipe
change_paragraph_styles_methods.py shows these different methods.
"""
import os
from pathlib import Path
from odfdo import Document, Header, Paragraph, Style
_DOC_SEQUENCE = 330
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_styles"
TARGET = "document.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_style_header_blue(document: Document) -> None:
"""A style derived from the standard heading style.
Bold blue font 160%, outline level 1
"""
style = Style(
family="paragraph",
name="header_blue",
display_name="header_blue",
parent_style="Heading",
area="text",
bold=True,
color="blue",
size="160%",
)
style.set_attribute("style:default-outline-level", "1")
document.insert_style(style)
def create_style_header_navy(document: Document) -> None:
"""A style derived from the standard heading style.
Bold navy blue font 120%, outline Level 2
"""
style = Style(
family="paragraph",
name="header_navy",
display_name="header_navy",
parent_style="Heading",
area="text",
bold=True,
color="navy",
size="120%",
)
style.set_attribute("style:default-outline-level", "2")
document.insert_style(style)
def create_style_steel(document: Document) -> None:
"""A style derived from the standard text style.
Yellow font on dark blue
"""
style = Style(
family="paragraph",
area="text",
name="steel",
display_name="steel",
color="yellow",
background_color="darkblue",
)
style.set_properties(
area="graphic",
properties={
"draw:fill": "solid",
"draw:fill-color": "darkblue",
},
)
document.insert_style(style)
def create_style_special(document: Document) -> None:
"""A style derived from the standard text style with fixed font.
Courier New font, antique white background, 2cm margin and centered text
"""
style = Style(
family="paragraph",
area="text",
name="special",
display_name="special",
font="Courier New",
font_family="Courier New",
font_style_name="Regular",
font_pitch="fixed",
background_color="AntiqueWhite",
)
style.set_properties(
area="paragraph",
properties={
"fo:margin-left": "2cm",
"fo:margin-right": "2cm",
"fo:line-height": "150%",
"fo:text-align": "center",
},
)
document.insert_style(style)
def create_style_bold_gold(document: Document) -> None:
"""A style derived from the standard text style.
Bold font in dark goldenrod color
"""
style = Style(
family="text",
name="bold_gold",
display_name="bold_gold",
bold=True,
color="darkgoldenrod",
)
document.insert_style(style)
def create_style_italic_lime(document: Document) -> None:
"""An italic style derived from the standard text style.
Font italic, size 120%, color lime green
"""
style = Style(
family="text",
name="italic_lime",
display_name="italic_lime",
italic=True,
size="120%",
color="lime",
)
document.insert_style(style)
def add_styles(document: Document) -> None:
"""Add text styles to the document."""
create_style_header_blue(document)
create_style_header_navy(document)
create_style_steel(document)
create_style_special(document)
create_style_bold_gold(document)
create_style_italic_lime(document)
def add_content(document: Document) -> None:
"""Add some styled paragraphs and headers to the document."""
body = document.body
body.append(Header(1, "First level header", style="header_blue"))
body.append(Header(2, "First sub header", style="header_navy"))
para = Paragraph(
"Lorem ipsum dolor sit amet, consectetuer "
"adipiscing elit. Sed non risus. "
"Suspendisse lectus tortor, dignissim sit amet, "
"adipiscing nec, ultricies sed, dolor."
)
para.set_span("bold_gold", regex="dolor")
para.set_span("italic_lime", regex=r"\w+ing")
body.append(para)
body.append(Header(2, "Second sub header", style="header_navy"))
para = Paragraph(
"Cras elementum ultrices diam. Maecenas ligula massa, "
"varius a, semper congue, euismod non, mi. Proin porttitor, "
"orci nec nonummy molestie, enim est eleifend mi, non "
"fermentum diam nisl sit amet erat. Duis semper.",
style="steel",
)
para.set_span("italic_lime", regex="semper")
body.append(para)
body.append(Header(2, "Third sub header", style="header_navy"))
para = Paragraph(
"Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
"enim. Pellentesque congue. Ut in risus volutpat libero "
"pharetra tempor. Cras vestibulum bibendum augue. Praesent "
"egestas leo in pede. Praesent blandit odio eu enim. "
"Pellentesque sed dui ut augue blandit sodales.",
style="special",
)
body.append(para)
def create_document() -> Document:
"""Generate a text Document with styles."""
document = Document()
body = document.body
body.clear()
add_styles(document)
add_content(document)
return document
def main() -> None:
document = create_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
style1 = document.get_style("paragraph", "header_blue").serialize()
assert 'name="header_blue"' in style1
assert 'color="#0000FF"' in style1
assert 'font-weight="bold"' in style1
assert 'font-size="160%"' in style1
style2 = document.get_style("paragraph", "header_navy").serialize()
assert 'name="header_navy"' in style2
assert 'color="#000080"' in style2
assert 'font-weight="bold"' in style2
assert 'font-size="120%"' in style2
style3 = document.get_style("paragraph", "steel").serialize()
assert 'name="steel"' in style3
assert 'color="#FFFF00"' in style3
assert "graphic-properties" in style3
assert 'draw:fill-color="#00008B"' in style3
style4 = document.get_style("paragraph", "special").serialize()
assert 'name="special"' in style4
assert 'background-color="#FAEBD7"' in style4
assert "Courier" in style4
assert 'line-height="150%"' in style4
assert 'margin-left="2cm"' in style4
assert 'margin-right="2cm"' in style4
assert 'text-align="center"' in style4
style5 = document.get_style("text", "bold_gold").serialize()
assert 'name="bold_gold"' in style5
assert 'color="#B8860B"' in style5
assert 'font-weight="bold"' in style5
style6 = document.get_style("text", "italic_lime").serialize()
assert 'name="italic_lime"' in style6
assert 'color="#00FF00"' in style6
assert 'font-style="italic"' in style6
assert 'font-size="120%"' in style6
if __name__ == "__main__":
main()
How to apply a style to a paragraph
Minimal example of how to add a styled paragraph to a document.
recipes/how_to_apply_a_style_to_a_paragraph.py
#!/usr/bin/env python
"""Minimal example of how to add a styled paragraph to a document."""
import os
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 335
def generate_document() -> Document:
"""Return a document with some styled paragraph"""
document = Document("text")
body = document.body
body.clear()
# Assuming we have a style of name "highlight" :
body.append(Paragraph("Highlighting the word", style="highlight"))
return document
def main() -> None:
document = generate_document()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
paragraphs = document.body.paragraphs
assert len(paragraphs) == 1
assert paragraphs[0].style == "highlight"
if __name__ == "__main__":
main()
Change paragraph styles methods
Many examples of how to change paragraph (and in-paragraph) styles, either by changing the paragraph style itself or by using Span to select parts of the paragraph. Includes several ways to create or import styles.
recipes/change_paragraph_styles_methods.py
#!/usr/bin/env python
"""Many examples of how to change paragraph (and in-paragraph) styles, either
by changing the paragraph style itself or by using Span to select parts
of the paragraph. Includes several ways to create or import styles.
"""
import os
from collections.abc import Iterator
from itertools import cycle
from pathlib import Path
from odfdo import Document, Element, Header, Paragraph, Style
_DOC_SEQUENCE = 340
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "change_styles"
DATA = Path(__file__).parent / "data"
LOREM = (DATA / "lorem.txt").read_text(encoding="utf8")
STYLED_SOURCE = "lpod_styles.odt"
TARGET_BEFORE = "document_before.odt"
TARGET_AFTER = "document_after.odt"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def iter_lorem() -> Iterator[str]:
"""Return infinite iterator on Lorem Ipsum content."""
return cycle(lr.strip() for lr in LOREM.replace("\n", " ").split("."))
def make_base_document() -> Document:
"""Generate document from parts of lorem ipsum content."""
document = Document("odt")
body = document.body
body.clear()
# Add some content with headers
lorem = iter_lorem()
title1 = Header(1, next(lorem))
body.append(title1)
for _i in range(3):
title = Header(2, next(lorem))
body.append(title)
for _j in range(5):
body.append(Paragraph(next(lorem) + ". " + next(lorem) + "."))
return document
def add_some_styles(document) -> None:
"""Add programmatically generated styles to the document."""
# Always simpler to copy styles from an actual .odt existing file, but:
document.insert_style(
Style(
family="paragraph",
area="text",
display_name="bold-blue",
color="blue",
bold=True,
),
automatic=True,
)
document.insert_style(
Style(
family="paragraph",
area="text",
display_name="italic-red",
color="red",
bold=True,
italic=True,
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="green",
background_color="green",
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="bold-yellow-blue",
color="yellow",
background_color="blue",
bold=True,
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="bold-white-black",
color="white",
background_color="black",
bold=True,
),
automatic=True,
)
document.insert_style(
Style(
family="text",
area="text",
display_name="italic-red-yellow",
color="red",
background_color="yellow",
bold=True,
italic=True,
),
automatic=True,
)
def add_style_from_xml(document: Document) -> None:
"""Add styles defined by XML content to the document."""
# Styles can be defined by WML definition
document.insert_style(
Element.from_tag(
'<style:style style:name="custom" '
'style:display-name="custom" '
'style:family="paragraph" '
'style:parent-style-name="Text">'
'<style:paragraph-properties fo:margin-left="2cm"/>'
'<style:text-properties fo:color="#808080" loext:opacity="100%" '
'fo:font-size="16pt" fo:font-style="normal" '
'style:text-underline-style="solid" '
'style:text-underline-width="auto" '
'style:text-underline-color="font-color" '
'fo:font-weight="bold"/>'
"</style:style>"
)
)
def import_style_from_other_doc(document: Document) -> None:
"""Add styles imported from another document to the document."""
styled_doc = Document(DATA / STYLED_SOURCE)
highlight = styled_doc.get_style("text", display_name="Yellow Highlight")
document.insert_style(highlight, automatic=True)
def apply_styles(document: Document) -> None:
"""Apply some style changes to the document."""
def change_all_headers() -> None:
style = document.get_style(family="text", display_name="green")
# header styles should include some hints about he numeration level
# So, here we just prefer to apply style with a span
for header in document.body.headers:
header.set_span(style.name, offset=0)
def change_all_paragraphs() -> None:
style = document.get_style(family="paragraph", display_name="bold-blue")
for para in document.body.paragraphs:
para.style = style.name
def change_some_paragraph() -> None:
style = document.get_style(family="paragraph", display_name="italic-red")
document.body.get_paragraph(3).style = style.name
document.body.get_paragraph(5).style = style.name
document.body.get_paragraph(7).style = style.name
def apply_span_regex() -> None:
yellow = document.get_style(family="text", display_name="bold-yellow-blue")
white = document.get_style(family="text", display_name="bold-white-black")
for para in document.body.paragraphs:
para.set_span(yellow.name, regex=r"tortor|ipsum")
para.set_span(white.name, regex=r"A\w+")
def apply_span_offset() -> None:
red = document.get_style(family="text", display_name="italic-red-yellow")
para = document.body.get_paragraph(2)
para.set_span(red.name, offset=9, length=22)
def apply_custom_style() -> None:
para = document.body.get_paragraph(13)
para.style = "custom"
def apply_imported_style() -> None:
para = document.body.get_paragraph(14)
style = document.get_style(family="text", display_name="Yellow Highlight")
# feature: to not highlight spaces, make as many Spans as required:
for start, end in para.search_all(r"\w+"):
length = end - start
para.set_span(style.name, offset=start, length=length)
change_all_headers()
change_all_paragraphs()
change_some_paragraph()
apply_span_regex()
apply_span_offset()
apply_custom_style()
apply_imported_style()
def main() -> None:
document = make_base_document()
save_new(document, TARGET_BEFORE)
add_some_styles(document)
add_style_from_xml(document)
import_style_from_other_doc(document)
apply_styles(document)
test_unit(document)
save_new(document, TARGET_AFTER)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(list(document.body.paragraphs)) == 15
for display_name in (
"bold-blue",
"italic-red",
"custom",
):
style = document.get_style(family="paragraph", display_name=display_name)
assert document.get_styled_elements(style.name)
for display_name in (
"green",
"bold-yellow-blue",
"bold-white-black",
"Yellow Highlight",
):
style = document.get_style(family="text", display_name=display_name)
assert document.get_styled_elements(style.name)
style = document.get_style(family="text", display_name="Yellow Highlight")
assert len(document.get_styled_elements(style.name)) == 21
if __name__ == "__main__":
main()
Delete parts of a text document
Deleting content from one point to another in a .odt document.
(Idea from an answer to problem #49).
recipes/delete_parts_of_a_text_document.py
#!/usr/bin/env python
"""Deleting content from one point to another in a .odt document.
(Idea from an answer to problem #49).
"""
import os
from pathlib import Path
from odfdo import Document, Element, Header, Paragraph
_DOC_SEQUENCE = 400
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "delete_content"
TARGET_INITIAL = "document_initial.odt"
TARGET_FINAL = "document_final.odt"
class KeepingState:
"""Minimalistic class to remember our process state while parsing
the content.
State can be "before", "deleting" or "after".
"""
def __init__(self, initial_state: str = "before") -> None:
self.step = initial_state
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def create_base_document() -> Document:
"""Return a document containing content requiring deletion."""
document = Document("text")
body = document.body
body.clear()
body.append(Header(1, "Some title"))
body.append(Header(2, "part A"))
body.append(
Paragraph(
"Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed non risus."
)
)
body.append(
Paragraph(
"Suspendisse lectus tortor, dignissim sit amet, adipiscing "
"nec, ultricies sed, dolor. Cras elementum ultrices diam. "
"Maecenas ligula massa, varius a, semper congue, euismod non, mi."
)
)
body.append(Header(2, "part B"))
body.append(
Paragraph(
"Proin porttitor, orci nec nonummy molestie, enim est eleifend "
"mi, non fermentum diam nisl sit amet erat. Duis semper. "
"Duis arcu massa, scelerisque vitae, consequat in, pretium a, "
"enim. Pellentesque congue. Ut in risus volutpat libero pharetra tempor."
)
)
body.append(
Paragraph(
"Cras vestibulum bibendum augue. Praesent egestas leo in pede. "
"Praesent blandit odio eu enim. Pellentesque sed dui ut augue "
"blandit sodales. Vestibulum ante ipsum primis in faucibus orci "
"luctus et ultrices posuere cubilia Curae; Aliquam nibh."
)
)
body.append(Header(2, "part C"))
body.append(
Paragraph(
"Mauris ac mauris sed pede pellentesque fermentum. "
"Maecenas adipiscing ante non diam sodales hendrerit. Ut "
"velit mauris, egestas sed, gravida nec, ornare ut, mi."
)
)
body.append(
Paragraph(
"Aenean ut orci vel massa suscipit pulvinar. Nulla sollicitudin. "
"Fusce varius, ligula non tempus aliquam, nunc turpis "
"ullamcorper nibh, in tempus sapien eros vitae ligula. "
"Pellentesque rhoncus nunc et augue. Integer id felis. Curabitur "
"aliquet pellentesque diam. Integer quis metus vitae elit "
"lobortis egestas."
)
)
body.append(Header(2, "part D"))
body.append(
Paragraph(
"Morbi vel erat non mauris convallis vehicula. Nulla et sapien. "
"Integer tortor tellus, aliquam faucibus, convallis id, congue "
"eu, quam. Mauris ullamcorper felis vitae erat."
"Proin feugiat, augue non elementum posuere, metus purus "
"iaculis lectus, et tristique ligula justo vitae magna. Aliquam "
"convallis sollicitudin purus."
)
)
body.append(
Paragraph(
"Praesent aliquam, enim at fermentum mollis, ligula massa "
"adipiscing nisl, ac euismod nibh nisl eu lectus. Fusce "
"vulputate sem at sapien. Vivamus leo. Aliquam euismod "
"libero eu enim. Nulla nec felis sed leo placerat imperdiet."
)
)
body.append(
Paragraph(
"Aenean suscipit nulla in justo. Suspendisse cursus rutrum augue. "
"Nulla tincidunt tincidunt mi. Curabitur iaculis, lorem vel "
"rhoncus faucibus, felis magna fermentum augue, et ultricies "
"lacus lorem varius purus. Curabitur eu amet."
)
)
return document
def keep_element(
state: KeepingState,
start_marker: str,
end_marker: str,
elem: Element,
) -> bool:
"""Returns True if the current element should be kept, False if it
should be deleted.
Finds the start_marker in heading elements only and the end_marker
at the beginning of a paragraph element.
"""
# keep everything until "part B"
if state.step == "before":
if isinstance(elem, Header) and start_marker in str(elem):
state.step = "deleting"
# delete everything until paragraph starting with "Aenean"
if state.step == "deleting":
if isinstance(elem, Paragraph) and str(elem).startswith(end_marker):
state.step = "after"
return state.step != "deleting"
def delete_content(document: Document, start_marker: str, end_marker: str) -> None:
"""Delete elements from the document between the start_marker (included)
and the end_marker (excluded).
"""
state = KeepingState()
keep_list: list[Element] = []
for elem in document.body.children:
if keep_element(state, start_marker, end_marker, elem):
keep_list.append(elem)
document.body.clear()
document.body.extend(keep_list)
def main() -> None:
document = create_base_document()
save_new(document, TARGET_INITIAL)
# Deleting content from "part B" to "Aenean".
# By deleting all of part B and half of part C, the end
# of part C will be therefore in the continuity of part A
delete_content(document, "part B", "Aenean")
save_new(document, TARGET_FINAL)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
text0 = str(document.body.get_paragraph(position=0))
assert text0.startswith("Lorem")
text1 = str(document.body.get_paragraph(position=3))
assert text1.startswith("Morbi")
if __name__ == "__main__":
main()
Create color chart in spreadsheet
Create some color chart in a spreadsheet using cells styles functions.
For cells, use of functions: make_table_cell_border_string() create_table_cell_style() rgb2hex()
Apply a row style to define the row height.
Apply a column style to define the column width.
recipes/create_color_chart_in_spreadsheet.py
#!/usr/bin/env python
"""Create some color chart in a spreadsheet using cells styles functions.
For cells, use of functions:
make_table_cell_border_string()
create_table_cell_style()
rgb2hex()
Apply a row style to define the row height.
Apply a column style to define the column width.
"""
import os
from pathlib import Path
from odfdo import (
Cell,
Document,
Row,
Style,
Table,
create_table_cell_style,
make_table_cell_border_string,
rgb2hex,
)
_DOC_SEQUENCE = 420
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "chart"
TARGET = "color_chart.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_chart() -> Document:
"""Generate a spreadsheet with colored cells.
For cells, use of functions:
make_table_cell_border_string()
create_table_cell_style()
rgb2hex()
Apply a row style to define the row height.
Apply a column style to define the column width.
"""
document = Document("spreadsheet")
body = document.body
body.clear()
table = Table("chart")
for y in range(0, 256, 8):
row = Row()
for x in range(0, 256, 32):
cell_value = (x, y, (x + y) % 256)
border_right_left = make_table_cell_border_string(
thick="0.20cm",
color="white",
)
border_top_bottom = make_table_cell_border_string(
thick="0.80cm",
color="white",
)
style = create_table_cell_style(
color="grey",
background_color=cell_value,
border_right=border_right_left,
border_left=border_right_left,
border_bottom=border_top_bottom,
border_top=border_top_bottom,
)
name = document.insert_style(style=style, automatic=True)
cell = Cell(value=rgb2hex(cell_value), style=name)
row.append_cell(cell)
table.append_row(row)
row_style = Style("table-row", height="1.80cm")
name_style_row = document.insert_style(style=row_style, automatic=True)
for row in table.rows:
row.style = name_style_row
table.set_row(row.y, row)
col_style = Style("table-column", width="3.6cm")
name = document.insert_style(style=col_style, automatic=True)
for column in table.columns:
column.style = col_style
table.set_column(column.x, column)
body.append(table)
return document
def main() -> None:
document = generate_chart()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(name="chart")
assert isinstance(table, Table)
cell = table.get_cell("A1")
assert cell.value == "#000000"
style = document.get_style("table-cell", cell.style)
assert style.get_properties()["fo:background-color"] == cell.value
cell = table.get_cell("H1")
assert cell.value == "#E000E0"
style = document.get_style("table-cell", cell.style)
assert style.get_properties()["fo:background-color"] == cell.value
cell = table.get_cell("A32")
assert cell.value == "#00F8F8"
style = document.get_style("table-cell", cell.style)
assert style.get_properties()["fo:background-color"] == cell.value
cell = table.get_cell("H32")
assert cell.value == "#E0F8D8"
style = document.get_style("table-cell", cell.style)
assert style.get_properties()["fo:background-color"] == cell.value
if __name__ == "__main__":
main()
Get cell background color
Read the background color of a table cell.
recipes/get_cell_background_color.py
#!/usr/bin/env python
"""Read the background color of a table cell."""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 440
DATA = Path(__file__).parent / "data"
SOURCE = "cell_color.ods"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def read_color(document: Document) -> list[tuple[str, str]]:
"""Read cell background color from the table 0 (first sheet)."""
result = []
color = document.get_cell_background_color(0, "b2")
result.append(("Color for B2", color))
color = document.get_cell_background_color(0, "b3")
result.append(("Color for B3", color))
color = document.get_cell_background_color(0, "c3")
result.append(("Color for C3", color))
color = document.get_cell_background_color(0, "d3")
result.append(('Color for D3 (default is "#ffffff")', color))
color = document.get_cell_background_color(0, "e3", "#123456")
result.append(("Color for e3 (providing another default)", color))
color = document.get_cell_background_color(0, (1000, 10000))
result.append(("Color for far away cell", color))
print("\n".join(": ".join(x) for x in result))
return result
def main() -> None:
document = read_source_document()
result = read_color(document)
test_unit(result)
def test_unit(colors: list[tuple[str, str]]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert colors[0][1] == "#2a6099"
assert colors[1][1] == "#ff4000"
assert colors[2][1] == "#ffff00"
assert colors[3][1] == "#ffffff"
assert colors[4][1] == "#123456"
assert colors[5][1] == "#ffffff"
if __name__ == "__main__":
main()
Extract a sub table from some big table
Open a table of 1000 lines and 100 columns, extract a sub table of 100 lines 26 columns, save the result in a spreadsheet document.
recipes/extract_a_sub_table_from_some_big_table.py
#!/usr/bin/env python
"""Open a table of 1000 lines and 100 columns, extract a sub table
of 100 lines 26 columns, save the result in a spreadsheet document.
"""
import os
import sys
from pathlib import Path
from odfdo import Document, Row, Table
_DOC_SEQUENCE = 450
DATA = Path(__file__).parent / "data"
SOURCE = "big_table.ods"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "extract_table"
TARGET = "document.ods"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def extract_sub_tables(document: Document) -> Document:
"""Return a spreadsheet with 2 sheets extracted from the imput document."""
# Expected_result:
# Size of Big Table : (100, 1000)
# Size of extracted table 1 : (26, 100)
# Size of extracted table 2 : (26, 100)
#
big_table = document.body.get_table(name="Big Table")
print("Size of Big Table :", big_table.size)
extracted = Document("ods")
extracted.body.clear()
# now extract 100 rows of 26 columns :
table1 = Table("Extract 1")
for r in range(800, 900):
row = big_table.get_row(r)
extracted_values = [row.get_value(x) for x in range(50, 76)]
new_row = Row()
new_row.set_values(extracted_values)
table1.append(new_row)
extracted.body.append(table1)
print("Size of extracted table 1 :", table1.size)
# other method
table2 = Table("Extract 2")
cells = big_table.get_cells(coord=(50, 800, 75, 899))
table2.set_cells(coord=(0, 0), cells=cells)
extracted.body.append(table2)
print("Size of extracted table 2 :", table2.size)
return extracted
def main() -> None:
document = read_source_document()
extracted = extract_sub_tables(document)
test_unit(extracted)
save_new(extracted, TARGET)
def test_unit(spreadsheet: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = spreadsheet.body
table1 = body.get_table(position=0)
assert table1.size == (26, 100)
table2 = body.get_table(position=1)
assert table2.size == (26, 100)
if __name__ == "__main__":
main()
Make a basic spreadsheet
Create a spreadsheet with one table and a few data, strip the table and compute the table size.
recipes/make_a_basic_spreadsheet.py
#!/usr/bin/env python
"""Create a spreadsheet with one table and a few data, strip the table
and compute the table size.
"""
import os
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 460
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "basic_ods"
TARGET = "spreadsheet.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document():
# creating an empty spreadsheet document:
document = Document("spreadsheet")
# Each sheet of a spreadsheet is a table:
# setting the beginning width (columns) and height (rows)
# is not mandatory.
body = document.body
body.clear()
table = Table("First Table", width=20, height=3)
body.append(table)
# A table contains rows, we can append some more.
for _ in range(2):
table.append_row()
print("rows in the table (3 at creation + 2 appended):", len(table.rows))
# A row contains cells
for row in table.rows:
print("row, nb of cells: ", row.y, len(row.cells))
last_row = table.get_row(-1)
print("nb of cells of the last row:", len(last_row.cells))
# cell can have different kind of values
for row_nb in range(3):
for col_nb in range(10):
table.set_value((col_nb, row_nb), f"cell {col_nb} {row_nb}")
for row_nb in range(3, 5):
for col_nb in range(10):
table.set_value((col_nb, row_nb), col_nb * 100 + row_nb)
# Before saving the document, we can strip the unused colums:
print("table size before strip:", table.size)
table.rstrip()
print("table size after strip:", table.size)
print("nb of cells of the last row:", len(table.get_row(-1).cells))
print("Content of the table (CSV):")
print(table.to_csv())
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(position=0)
assert table.size == (10, 5)
assert table.get_cell("A1").value == "cell 0 0"
assert table.get_cell("A5").value == 4
assert table.get_cell("J1").value == "cell 9 0"
assert table.get_cell("J5").value == 904
if __name__ == "__main__":
main()
Make spreadsheet with named ranges
Create a spreadsheet with two tables, using named ranges to fill cells.
recipes/make_spreadsheet_with_named_ranges.py
#!/usr/bin/env python
"""Create a spreadsheet with two tables, using named ranges to fill cells."""
import os
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 470
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "named_range"
TARGET = "spreadsheet.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def make_some_table_content() -> Table:
table = Table("First Table")
# populate the table :
for index in range(10):
table.set_value((1, index), (index + 1) ** 2)
table.set_value("A11", "Total:")
return table
def generate_document() -> Document:
document = Document("spreadsheet")
body = document.body
body.clear()
table = make_some_table_content()
body.append(table)
# lets define a named range for the 10 values :
range_squares = "B1:B10"
name = "squares_values"
table_name = table.name
table.set_named_range(name, range_squares, table_name)
# we can define a single cell range, using notation "B11" or (1, 10) :
table.set_named_range("total", (1, 10), table_name)
# get named range values :
values = table.get_named_range("squares_values").get_values(flat=True)
# set named range value :
result = sum(values)
table.get_named_range("total").set_value(result)
# lets use the named ranges from a second table :
table2 = Table("Second Table")
body.append(table2)
named_range1 = table2.get_named_range("total")
table2.set_value("A1", "name:")
table2.set_value("B1", named_range1.name)
table2.set_value("A2", "range:")
table2.set_value("B2", str(named_range1.crange))
table2.set_value("A3", "from table:")
table2.set_value("B3", named_range1.table_name)
table2.set_value("A4", "content:")
table2.set_value("B4", named_range1.get_value())
named_range2 = table2.get_named_range("squares_values")
table2.set_value("D1", "name:")
table2.set_value("E1", named_range2.name)
table2.set_value("D2", "range:")
table2.set_value("E2", str(named_range2.crange))
table2.set_value("D3", "from table:")
table2.set_value("E3", named_range2.table_name)
table2.set_value("D4", "content:")
# using "E4:4" notaion is a little hack for the area starting at E4 on row 4
table2.set_values(values=[named_range2.get_values(flat=True)], coord="E4:4")
print("Content of the table 1:")
print(table.name)
print(table.to_csv())
print("Content of the table 2:")
print(table2.name)
print(table2.to_csv())
return document
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table1 = document.body.get_table(0)
assert table1.get_cell("B1").value == 1
assert table1.get_cell("B10").value == 100
assert table1.get_cell("B11").value == 385
table2 = document.body.get_table(1)
assert table2.get_cell("B4").value == 385
assert table2.get_cell("N4").value == 100
if __name__ == "__main__":
main()
Introspecting elements
Demo of quick introspecting of a document’s elements.
The body object of a document is a mapping of an XML tree from which we can access other elements we are looking for (parent, children).
recipes/introspecting_elements.py
#!/usr/bin/env python
"""Demo of quick introspecting of a document's elements.
The body object of a document is a mapping of an XML tree from which we
can access other elements we are looking for (parent, children)."""
import os
import sys
from pathlib import Path
from pprint import pformat
from typing import Any
from odfdo import Document
_DOC_SEQUENCE = 480
DATA = Path(__file__).parent / "data"
# ODF export of Wikipedia article Hitchhiker's Guide to the Galaxy (CC-By-SA) :
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def analyser(document: Document) -> dict[str, Any]:
"""Return information from an element of the document."""
result: dict[str, Any] = {}
# Elements are part of an XML tree:
paragraph = document.body.get_paragraph(position=42)
result["tag"] = paragraph.tag
result["attributes"] = paragraph.attributes
result["str"] = str(paragraph)
result["parent"] = paragraph.parent
result["children"] = paragraph.children
result["serialize"] = paragraph.serialize(pretty=True)
print("Informations about the paragraph:")
print(pformat(result))
return result
def main() -> None:
document = read_source_document()
result = analyser(document)
test_unit(result)
def test_unit(result: dict[str, Any]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert result["tag"] == "text:p"
assert repr(result["parent"]) == "<Element tag=text:note-body>"
assert repr(result["children"]) == "[<Span tag=text:span>]"
if __name__ == "__main__":
main()
Show meta data
Print the metadata informations of a ODF file.
Metadata are accessible through the meta part: meta = document.get_part(“meta.xml”) or the shortcut: document.meta.
You then get access to various getters and setters. The getters return Python types and the respective setters take the same Python type as a parameter.
recipes/show_meta_data.py
#!/usr/bin/env python
"""Print the metadata informations of a ODF file.
Metadata are accessible through the meta part: meta = document.get_part("meta.xml")
or the shortcut: document.meta.
You then get access to various getters and setters. The getters return
Python types and the respective setters take the same Python type as
a parameter.
"""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 490
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def print_meta(document: Document) -> None:
"""Print the medadata of the document.
Output of the get_xxx methods for metadata.
Notice that odfdo doesn't increment editing cycles nor statistics
when saving the document.
For the metadata using dates or durations, odfdo provides datatypes that
decode from and serialize back to strings.
Strings are always decoded as utf-8, numeric values are decoded
as Decimal."""
meta = document.meta
print(f"Meta data of {document.path}")
print("Title :", meta.title)
print("creator :", meta.creator)
print("creation date :", meta.creation_date)
print("modification date :", meta.date)
print("initial creator :", meta.initial_creator)
print("subject :", meta.subject)
print("description :", meta.description)
print("editing cycles :", meta.editing_cycles)
print("editing duration :", meta.editing_duration)
print("generator :", meta.generator)
print("language :", meta.language)
print("keywords :", meta.keyword)
print("statistics ")
if meta.statistic is not None:
for key, value in meta.statistic.items():
print(f" {key[5:]:<18}: {value}")
user_defined = meta.user_defined_metadata
if user_defined:
print("user defined metadata")
for key, value in user_defined.items():
print(f" {key[5:]:<18}: {value}")
# A quick way to have all of those informations:
print("-" * 70)
print(document.get_formated_meta())
def main() -> None:
document = read_source_document()
print_meta(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
meta = document.meta
assert meta.keyword.strip() == "Douglas Adams"
assert meta.statistic["meta:page-count"] == 20
if __name__ == "__main__":
main()
Move link to footnote
Remove all links from a document, transforming each link information (URL, text) into a footnote. Of course, removing links already inside notes, just keeping plain text URL. (Side note: most office suite dislike notes in notes)
recipes/move_link_to_footnote.py
#!/usr/bin/env python
"""Remove all links from a document, transforming each link information (URL,
text) into a footnote. Of course, removing links already inside notes, just
keeping plain text URL. (Side note: most office suite dislike notes in notes)
"""
import os
import sys
from pathlib import Path
from odfdo import Document, Link, remove_tree
_DOC_SEQUENCE = 500
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "footnote1"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def convert_links(document: Document) -> list[tuple[str, int]]:
body = document.body
result: list[tuple[str, int]] = []
result.append(("source, links occurrences:", len(body.get_links())))
result.append(("source, footnotes occurences:", len(body.get_notes())))
counter_links_in_notes = 0
for note in body.get_notes():
for link in note.get_links():
counter_links_in_notes += 1
url = link.get_attribute("xlink:href")
tail = link.tail
new_tail = f" (link: {url}) {tail}"
link.tail = new_tail
remove_tree(note, Link)
result.append(("source, links inside notes:", counter_links_in_notes))
counter_added_note = 0 # added notes counter
for paragraph in body.paragraphs:
for link in paragraph.get_links():
url = link.get_attribute("xlink:href")
text = link.inner_text
counter_added_note += 1
paragraph.insert_note(
after=link, # citation is inserted after current link
note_id=f"my_note_{counter_added_note}",
citation="1", # The symbol the user sees to follow the footnote.
# The footnote itself, at the end of the page:
body=(f". {text}, link: {url}"),
)
remove_tree(paragraph, Link)
result.append(("final, links occurrences:", len(body.get_links())))
result.append(("final, added footnotes:", counter_added_note))
result.append(("final, footnotes occurences:", len(body.get_notes())))
for line in result:
print(line[0], line[1])
return result
def main() -> None:
document = read_source_document()
result = convert_links(document)
test_unit(result)
save_new(document, TARGET)
def test_unit(result: list[tuple[str, int]]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert result[0][1] == 352
assert result[1][1] == 49
assert result[2][1] == 38
assert result[3][1] == 0
assert result[4][1] == 314
assert result[5][1] == 363
if __name__ == "__main__":
main()
Remove http links
Remove all the links (the text:a tag), keeping the inner text.
recipes/remove_http_links.py
#!/usr/bin/env python
"""Remove all the links (the text:a tag), keeping the inner text."""
import os
import sys
from pathlib import Path
from odfdo import Document, Link, remove_tree
_DOC_SEQUENCE = 510
DATA = Path(__file__).parent / "data"
SOURCE = "collection2.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nolink"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def remove_all_links(document: Document) -> list[tuple[str, int]]:
"""Remove all links and return statistics."""
body = document.body
result: list[tuple[str, int]] = []
result.append(("source, links occurrences:", len(body.get_links())))
remove_tree(body, Link)
result.append(("final, links occurrences:", len(body.get_links())))
for line in result:
print(line[0], line[1])
return result
def main() -> None:
document = read_source_document()
result = remove_all_links(document)
test_unit(result)
save_new(document, TARGET)
def test_unit(result: list[tuple[str, int]]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert result[0][1] == 352
assert result[1][1] == 0
if __name__ == "__main__":
main()
Remove span styles
Remove span styles (like some words in bold in a paragraph), except in titles.
recipes/remove_span_styles.py
#!/usr/bin/env python
"""Remove span styles (like some words in bold in a paragraph),
except in titles.
"""
import os
import sys
from pathlib import Path
from odfdo import Document, Header, Span, remove_tree
_DOC_SEQUENCE = 520
DATA = Path(__file__).parent / "data"
SOURCE = "dormeur.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "nostyle"
TARGET = "document.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def remove_all_text_span(document: Document) -> None:
"""Remove all span styles from a Document, except in titles."""
body = document.body
print("source, 'text:span' occurrences:", len(body.spans))
remove_tree(document.body, Span, Header)
print("final, 'text:span' occurrences after removal:", len(body.spans))
def main() -> None:
document = read_source_document()
remove_all_text_span(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert len(document.body.spans) == 1
if __name__ == "__main__":
main()
Retrieve all pictures from odf files
Scan a list of files and directories (recursion), open all ODF documents and copy document images to a target directory.
recipes/retrieve_all_pictures_from_ODF_files.py
#!/usr/bin/env python
"""Scan a list of files and directories (recursion), open all ODF documents
and copy document images to a target directory.
"""
import os
import sys
import time
from hashlib import sha256
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 530
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "collected_pics"
DATA = Path(__file__).parent / "data"
# encoding = "UTF8"
known_images = set()
counter_image = 0
counter_odf = 0
counter_outside = 0
def store_image(path: Path, name: str, content: bytes) -> None:
"""Save the found image in result directory.
Image new name is "odffile_imagename"."""
global counter_image
base = path.name.replace(".", "_")
cpt = 1
if not OUTPUT_DIR.is_dir():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
while target.exists():
cpt += 1
target = OUTPUT_DIR / f"{base}_{cpt}_{name}"
target.write_bytes(content)
counter_image += 1
def parse_odf_pics(path: Path) -> None:
"""Using odfdo for:
- open possible ODF document: Document (including URI)
- find images inside the document: get_image_list, get_attribute
"""
if not path.suffix.lower().startswith(".od"):
return
try:
document = Document(path)
except Exception:
return
global counter_odf
global counter_outside
counter_odf += 1
for image in document.body.images:
image_url = image.url
if not image_url:
continue
try:
image_content = document.get_part(image_url)
except KeyError:
print("- not found inside document:", path)
print(" image URL:", image_url)
counter_outside += 1
continue
image_name = image_url.split("/")[-1]
if not known_pic(image_content):
store_image(path, image_name, image_content)
def known_pic(content: bytes) -> bool:
"""Remember the images already seen by sha256 fingerprint."""
fingerprint = sha256(content).digest()
if fingerprint in known_images:
return True
known_images.add(fingerprint)
return False
def scan_document(source: Path) -> list[int]:
"""Recursively parse ODF files to copy images."""
t0 = time.time()
for path in source.glob("**/*"):
if path.is_file():
parse_odf_pics(path)
elapsed = time.time() - t0
print(
f"{counter_image} images copied ({counter_outside} not found) from "
f"{counter_odf} ODF files to {OUTPUT_DIR} in {elapsed:.2f}sec."
)
return [counter_image, counter_outside, counter_odf]
def main() -> None:
try:
source = Path(sys.argv[1])
except IndexError:
source = DATA
result = scan_document(source)
test_unit(result)
def test_unit(result: list[int]) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert result[0] == 15
assert result[1] == 0
assert result[2] == 19
if __name__ == "__main__":
main()
Read document from bytesio
Read a document from BytesIO.
recipes/read_document_from_bytesio.py
#!/usr/bin/env python
"""Read a document from BytesIO."""
import io
import os
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 600
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"
def document_from_bytesio() -> Document:
file_path = DATA / SOURCE
with io.BytesIO() as bytes_content:
# read the file in the BytesIO (or read from some network)
bytes_content.write(file_path.read_bytes())
# Create the odfdo.Document from the BytesIO
bytes_content.seek(0)
document = Document(bytes_content)
return document
def main() -> None:
document = document_from_bytesio()
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
assert document.body.search("Lorem ipsum dolor sit amet") is not None
if __name__ == "__main__":
main()
Save document as bytesio
Save a document as BytesIO.
recipes/save_document_as_bytesio.py
#!/usr/bin/env python
"""Save a document as BytesIO."""
import io
import os
from pathlib import Path
from odfdo import Document, Paragraph
_DOC_SEQUENCE = 605
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "bytes"
TARGET = "document.odt"
def make_document() -> Document:
"""Return a Hello World document."""
document = Document("text")
body = document.body
paragraph = Paragraph("Hello World")
body.append(paragraph)
return document
def document_to_bytesio(document: Document) -> bytes:
with io.BytesIO() as bytes_content:
document.save(bytes_content)
# Now use the BytesIO in some way:
# In a netwotk context, typically:
# response.write(bytes_content.getvalue())
return bytes_content.getvalue()
def write_content(content: bytes) -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
path = OUTPUT_DIR / TARGET
path.write_bytes(content)
def main() -> None:
document = make_document()
bytes_content = document_to_bytesio(document)
test_unit(bytes_content)
write_content(bytes_content)
def test_unit(content: bytes) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
with io.BytesIO() as bytes_content:
bytes_content.write(content)
bytes_content.seek(0)
document = Document(bytes_content)
assert document.body.search("Hello World") is not None
if __name__ == "__main__":
main()
Export tables to csv format
Export tables to CSV format.
recipes/export_tables_to_csv_format.py
#!/usr/bin/env python
"""Export tables to CSV format."""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 610
DATA = Path(__file__).parent / "data"
SOURCE = "two_sheets.ods"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "csv"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def export_tables_to_csv(document: Document) -> None:
"""Export tables to CSV format."""
for index, table in enumerate(document.body.tables):
# default parameters produce an "excell" CSV format,
# see Python csv library for options.
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
path = OUTPUT_DIR / f"content_{index}.csv"
table.to_csv(path)
def main() -> None:
document = read_source_document()
export_tables_to_csv(document)
test_unit(document)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
content0 = document.body.tables[0].to_csv()
expected0 = "col A,col B,col C\r\n1,2,3\r\na text,,another\r\n"
assert content0 == expected0
content1 = document.body.tables[1].to_csv()
expected1 = ",,,\r\n,col B,col C,col D\r\n,1,2,3\r\n,a text,,another\r\n"
assert content1 == expected1
if __name__ == "__main__":
main()
Import csv content into a table
Import a CSV file and load data into a table.
recipes/import_csv_content_into_a_table.py
#!/usr/bin/env python
"""Import a CSV file and load data into a table."""
import os
import sys
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 615
DATA = Path(__file__).parent / "data"
SOURCE = "some_csv.csv"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "csv2"
TARGET = "document.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def read_text_document() -> str:
"""Return the source text file."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Path(source).read_text()
def import_csv() -> Document:
"""Return a document containing an imported CSV content."""
content = read_text_document()
document = Document("ods")
table = Table.from_csv(content, "Sheet name")
document.body.clear()
document.body.append(table)
return document
def main() -> None:
document = import_csv()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = document.body.get_table(0)
assert table.name == "Sheet name"
expected = ",,,\r\n,col B,col C,col D\r\n,1,2,3\r\n,a text,,another\r\n"
assert table.to_csv() == expected
if __name__ == "__main__":
main()
Search and replace words
Search and replace words in a text document.
recipes/search_and_replace_words.py
#!/usr/bin/env python
"""Search and replace words in a text document."""
import os
import sys
from pathlib import Path
from odfdo import Document
_DOC_SEQUENCE = 700
DATA = Path(__file__).parent / "data"
SOURCE = "lorem.odt"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "replaced_text"
TARGET = "lorem_replaced.odt"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def search_replace(document: Document) -> None:
body = document.body
# replace a string in the full document
body.replace("Lorem", "(Lorem replaced)")
# replace in paragraphs only
for paragraph in body.paragraphs:
paragraph.replace("ipsum", "(ipsum in paragraph)")
# replace in headers
for header in body.headers:
header.replace("ipsum", "(ipsum in header)")
# pattern is a regular expression
body.replace(r"\S+lit ", "(...lit) ")
body.replace(r"pul[a-z]+", "(pulvinar)")
def main() -> None:
document = read_source_document()
search_replace(document)
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
body = document.body
assert len(body.search_all("replaced")) == 3
assert len(body.search_all("(pulvinar)")) == 2
if __name__ == "__main__":
main()
Spreadsheet with words frequency from a text
Load an ODF text, store the frequency of words in a spreadsheet, make requests on the table, by regex or value.
recipes/spreadsheet_with_words_frequency_from_a_text.py
#!/usr/bin/env python
"""Load an ODF text, store the frequency of words in a spreadsheet,
make requests on the table, by regex or value.
"""
import os
import sys
from pathlib import Path
from odfdo import Document, Table
_DOC_SEQUENCE = 710
SOURCE = "collection2.odt"
DATA = Path(__file__).parent / "data"
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "freq"
TARGET = "frequency.ods"
def read_source_document() -> Document:
"""Return the source Document."""
try:
source = sys.argv[1]
except IndexError:
source = DATA / SOURCE
return Document(source)
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def frequence_count(document: Document) -> dict[str, int]:
"""Word frequency analysis of a document."""
print("Word frequency analysis of", Path(document.container.path).name)
text = str(document.body)
for char in "():;!.,[]{}#@/\\=-_+*#@`\"'":
text = text.replace(char, " ") # slow algorithm
words = text.split()
print("Nb of words:", len(words))
frequences = {}
for word in words:
frequences[word] = frequences.get(word, 0) + 1
print("Unique words found:", len(frequences))
return frequences
def scan_document(source: Document) -> Document:
"""Return a spreadsheet containing the word frequency of the source document."""
spreadsheet = Document("spreadsheet")
frequences = frequence_count(source)
# Populate the table in the spreadsheet
body = spreadsheet.body
body.clear()
table = Table("Frequency Table")
body.append(table)
sorted_keys = sorted([(value, key) for key, value in frequences.items()])
sorted_keys.reverse()
# possible solution :
# for value, key in sorted:
# row = Row()
# row.set_value(0, key)
# row.set_value(1, value) # Cell type is guessed.
# table.append_row(row)
# another solution :
sorted_keys = [(k, v) for (v, k) in sorted_keys]
table.set_values(sorted_keys)
print("Rows in the table :", len(table.rows))
# frequency of word:
regex_query = "^the"
print("Words corresponding to the regex:", regex_query)
result = table.get_rows(content=regex_query)
for row in result:
print(f" word: {row.get_value(0):<20} occurences: {row.get_value(1)}")
# list of words of frequecy = 15
found = []
for word, freq in table.iter_values():
if freq == 15:
found.append(word)
print("List of words of frequency 15:", ", ".join(found))
return spreadsheet
def main() -> None:
document = read_source_document()
freqs = scan_document(document)
test_unit(freqs)
save_new(freqs, TARGET)
def test_unit(freqs: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table = freqs.body.get_table(0)
assert table.get_cell("A1").value == "the"
assert table.get_cell("B1").value == 699
assert table.get_cell("A50").value == "which"
assert table.get_cell("B50").value == 23
if __name__ == "__main__":
main()
Transpose table
Transpose a table. Create a spreadsheet table (for example: 50 rows and 20 columns), then create a new table in a separate sheet where the columns and rows are swapped (for example: 20 rows and 50 columns).
recipes/transpose_table.py
#!/usr/bin/env python
"""Transpose a table. Create a spreadsheet table (for example: 50 rows and
20 columns), then create a new table in a separate sheet where the columns
and rows are swapped (for example: 20 rows and 50 columns).
"""
import os
from pathlib import Path
from odfdo import Document, Row, Table
_DOC_SEQUENCE = 800
OUTPUT_DIR = Path(__file__).parent / "recipes_output" / "transpose"
TARGET = "transposed.ods"
def save_new(document: Document, name: str) -> None:
"""Save a recipe result Document."""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
new_path = OUTPUT_DIR / name
print("Saving:", new_path)
document.save(new_path, pretty=True)
def generate_document() -> Document:
"""Return a spreadshhet with table swapped by 2 different methods."""
spreadsheet = Document("spreadsheet")
# Populate the table in the spreadsheet
body = spreadsheet.body
body.clear()
table = Table("Table")
body.append(table)
lines = 50
cols = 20
for line in range(lines):
row = Row()
for column in range(cols):
row.set_value(column, f"{chr(65 + column)}{line + 1}")
table.append(row)
print(f"Size of Table : {table.size}")
table2 = Table("Symmetry")
# building the symetric table using classical method :
for x in range(cols):
values = table.get_column_values(x)
table2.set_row_values(x, values)
body.append(table2)
print(f"Symmetrical table size 2 : {table2.size}")
# a more simple solution with the table.transpose() method :
table3 = table.clone
table3.transpose()
table3.name = "Transpose"
body.append(table3)
print(f"Symmetrical table size 3 : {table3.size}")
return spreadsheet
def main() -> None:
document = generate_document()
test_unit(document)
save_new(document, TARGET)
def test_unit(document: Document) -> None:
# only for test suite:
if "ODFDO_TESTING" not in os.environ:
return
table0 = document.body.get_table(position=0)
table1 = document.body.get_table(position=1)
table2 = document.body.get_table(position=2)
assert table0.size == (20, 50)
assert table1.size == (50, 20)
assert table2.size == (50, 20)
if __name__ == "__main__":
main()