python-docx
#
python-docx
是用于创建和更新 Microsoft Word (.docx
) 文件的 Python 库。
安装 很简单:
pip install python-docx
文档快速使用#
新建文档:
from docx import Document
document = Document()
添加一些东西:
document.add_heading("入门")
<docx.text.paragraph.Paragraph at 0x7f744c23aa20>
保存文档:
document.save('build/test.docx')
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[3], line 1
----> 1 document.save('build/test.docx')
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/document.py:149, in Document.save(self, path_or_stream)
143 def save(self, path_or_stream: str | IO[bytes]):
144 """Save this document to `path_or_stream`.
145
146 `path_or_stream` can be either a path to a filesystem location (a string) or a
147 file-like object.
148 """
--> 149 self._part.save(path_or_stream)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/parts/document.py:108, in DocumentPart.save(self, path_or_stream)
105 def save(self, path_or_stream: str | IO[bytes]):
106 """Save this document to `path_or_stream`, which can be either a path to a
107 filesystem location (a string) or a file-like object."""
--> 108 self.package.save(path_or_stream)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/package.py:167, in OpcPackage.save(self, pkg_file)
165 for part in self.parts:
166 part.before_marshal()
--> 167 PackageWriter.write(pkg_file, self.rels, self.parts)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/pkgwriter.py:34, in PackageWriter.write(pkg_file, pkg_rels, parts)
30 @staticmethod
31 def write(pkg_file, pkg_rels, parts):
32 """Write a physical package (.pptx file) to `pkg_file` containing `pkg_rels` and
33 `parts` and a content types stream based on the content types of the parts."""
---> 34 phys_writer = PhysPkgWriter(pkg_file)
35 PackageWriter._write_content_types_stream(phys_writer, parts)
36 PackageWriter._write_pkg_rels(phys_writer, pkg_rels)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/phys_pkg.py:109, in _ZipPkgWriter.__init__(self, pkg_file)
107 def __init__(self, pkg_file):
108 super(_ZipPkgWriter, self).__init__()
--> 109 self._zipf = ZipFile(pkg_file, "w", compression=ZIP_DEFLATED)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/zipfile/__init__.py:1331, in ZipFile.__init__(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps, metadata_encoding)
1329 while True:
1330 try:
-> 1331 self.fp = io.open(file, filemode)
1332 except OSError:
1333 if filemode in modeDict:
FileNotFoundError: [Errno 2] No such file or directory: 'build/test.docx'
打开已有文档:
document = Document('build/test.docx')
document.save('build/new-file-name.docx')
---------------------------------------------------------------------------
PackageNotFoundError Traceback (most recent call last)
Cell In[4], line 1
----> 1 document = Document('build/test.docx')
2 document.save('build/new-file-name.docx')
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/api.py:27, in Document(docx)
20 """Return a |Document| object loaded from `docx`, where `docx` can be either a path
21 to a ``.docx`` file (a string) or a file-like object.
22
23 If `docx` is missing or ``None``, the built-in default document "template" is
24 loaded.
25 """
26 docx = _default_docx_path() if docx is None else docx
---> 27 document_part = cast("DocumentPart", Package.open(docx).main_document_part)
28 if document_part.content_type != CT.WML_DOCUMENT_MAIN:
29 tmpl = "file '%s' is not a Word file, content type is '%s'"
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/package.py:127, in OpcPackage.open(cls, pkg_file)
124 @classmethod
125 def open(cls, pkg_file: str | IO[bytes]) -> OpcPackage:
126 """Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
--> 127 pkg_reader = PackageReader.from_file(pkg_file)
128 package = cls()
129 Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/pkgreader.py:22, in PackageReader.from_file(pkg_file)
19 @staticmethod
20 def from_file(pkg_file):
21 """Return a |PackageReader| instance loaded with contents of `pkg_file`."""
---> 22 phys_reader = PhysPkgReader(pkg_file)
23 content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
24 pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)
File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/phys_pkg.py:21, in PhysPkgReader.__new__(cls, pkg_file)
19 reader_cls = _ZipPkgReader
20 else:
---> 21 raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
22 else: # assume it's a stream and pass it to Zip reader to sort out
23 reader_cls = _ZipPkgReader
PackageNotFoundError: Package not found at 'build/test.docx'
文档样式#
样式 改变着文档外观。
document = Document()
styles = document.styles
styles
<docx.styles.styles.Styles at 0x7f744484f8f0>
文档分区#
Word 支持节的概念,节是文档中具有相同页面布局设置(如页边距和页面方向)的部分。
document = Document()
sections = document.sections
sections
<docx.section.Sections at 0x7f744c9a7bc0>
len(sections)
1
section = sections[0]
section
<docx.section.Section at 0x7f7444850b00>
添加分区:
document.add_section(start_type=2)
<docx.section.Section at 0x7f744c3d0fb0>