python-docx#

python-docx 是用于创建和更新 Microsoft Word (.docx) 文件的 Python 库。

安装 很简单:

pip install python-docx

文档快速使用#

新建文档:

from docx import Document

document = Document()

添加一些东西:

document.add_heading("入门")
<docx.text.paragraph.Paragraph at 0x7f744c23aa20>

保存文档:

document.save('build/test.docx')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[3], line 1
----> 1 document.save('build/test.docx')

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/document.py:149, in Document.save(self, path_or_stream)
    143 def save(self, path_or_stream: str | IO[bytes]):
    144     """Save this document to `path_or_stream`.
    145 
    146     `path_or_stream` can be either a path to a filesystem location (a string) or a
    147     file-like object.
    148     """
--> 149     self._part.save(path_or_stream)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/parts/document.py:108, in DocumentPart.save(self, path_or_stream)
    105 def save(self, path_or_stream: str | IO[bytes]):
    106     """Save this document to `path_or_stream`, which can be either a path to a
    107     filesystem location (a string) or a file-like object."""
--> 108     self.package.save(path_or_stream)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/package.py:167, in OpcPackage.save(self, pkg_file)
    165 for part in self.parts:
    166     part.before_marshal()
--> 167 PackageWriter.write(pkg_file, self.rels, self.parts)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/pkgwriter.py:34, in PackageWriter.write(pkg_file, pkg_rels, parts)
     30 @staticmethod
     31 def write(pkg_file, pkg_rels, parts):
     32     """Write a physical package (.pptx file) to `pkg_file` containing `pkg_rels` and
     33     `parts` and a content types stream based on the content types of the parts."""
---> 34     phys_writer = PhysPkgWriter(pkg_file)
     35     PackageWriter._write_content_types_stream(phys_writer, parts)
     36     PackageWriter._write_pkg_rels(phys_writer, pkg_rels)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/phys_pkg.py:109, in _ZipPkgWriter.__init__(self, pkg_file)
    107 def __init__(self, pkg_file):
    108     super(_ZipPkgWriter, self).__init__()
--> 109     self._zipf = ZipFile(pkg_file, "w", compression=ZIP_DEFLATED)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/zipfile/__init__.py:1331, in ZipFile.__init__(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps, metadata_encoding)
   1329 while True:
   1330     try:
-> 1331         self.fp = io.open(file, filemode)
   1332     except OSError:
   1333         if filemode in modeDict:

FileNotFoundError: [Errno 2] No such file or directory: 'build/test.docx'

打开已有文档:

document = Document('build/test.docx')
document.save('build/new-file-name.docx')
---------------------------------------------------------------------------
PackageNotFoundError                      Traceback (most recent call last)
Cell In[4], line 1
----> 1 document = Document('build/test.docx')
      2 document.save('build/new-file-name.docx')

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/api.py:27, in Document(docx)
     20 """Return a |Document| object loaded from `docx`, where `docx` can be either a path
     21 to a ``.docx`` file (a string) or a file-like object.
     22 
     23 If `docx` is missing or ``None``, the built-in default document "template" is
     24 loaded.
     25 """
     26 docx = _default_docx_path() if docx is None else docx
---> 27 document_part = cast("DocumentPart", Package.open(docx).main_document_part)
     28 if document_part.content_type != CT.WML_DOCUMENT_MAIN:
     29     tmpl = "file '%s' is not a Word file, content type is '%s'"

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/package.py:127, in OpcPackage.open(cls, pkg_file)
    124 @classmethod
    125 def open(cls, pkg_file: str | IO[bytes]) -> OpcPackage:
    126     """Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
--> 127     pkg_reader = PackageReader.from_file(pkg_file)
    128     package = cls()
    129     Unmarshaller.unmarshal(pkg_reader, package, PartFactory)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/pkgreader.py:22, in PackageReader.from_file(pkg_file)
     19 @staticmethod
     20 def from_file(pkg_file):
     21     """Return a |PackageReader| instance loaded with contents of `pkg_file`."""
---> 22     phys_reader = PhysPkgReader(pkg_file)
     23     content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
     24     pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)

File /opt/hostedtoolcache/Python/3.12.7/x64/lib/python3.12/site-packages/docx/opc/phys_pkg.py:21, in PhysPkgReader.__new__(cls, pkg_file)
     19         reader_cls = _ZipPkgReader
     20     else:
---> 21         raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
     22 else:  # assume it's a stream and pass it to Zip reader to sort out
     23     reader_cls = _ZipPkgReader

PackageNotFoundError: Package not found at 'build/test.docx'

文档样式#

样式 改变着文档外观。

document = Document()
styles = document.styles
styles
<docx.styles.styles.Styles at 0x7f744484f8f0>

文档分区#

Word 支持节的概念,节是文档中具有相同页面布局设置(如页边距和页面方向)的部分。

document = Document()
sections = document.sections
sections
<docx.section.Sections at 0x7f744c9a7bc0>
len(sections)
1
section = sections[0]
section
<docx.section.Section at 0x7f7444850b00>

添加分区:

document.add_section(start_type=2)
<docx.section.Section at 0x7f744c3d0fb0>