linux下docx转pdf

本文共有2926个字,关键词:

问题:ubuntu下如何进行doc转pdf?

解决: 使用libreoffice

libreoffice --headless --convert-to pdf /home/qc/a.docx --outdir /home/qc

方法:
下面是一个脚本

# -*- coding: utf-8 -*-
"""
linux platform word to pdf
python3.9
"""
import subprocess
import os

try:
    from comtypes import client
except ImportError:
    client = None

try:
    from win32com.client import constants, gencache
except ImportError:
    constants = None
    gencache = None
def doc2pdf_linux(docPath, pdfPath):
    """
    convert a doc/docx document to pdf format (linux only, requires libreoffice)
    :param doc: path to document
    libreoffice --headless --convert-to pdf /root/4321.docx
    """
    cmd = 'libreoffice --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
    p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    p.wait(timeout=30)
    stdout, stderr = p.communicate()
    # if stderr:
    #     raise subprocess.SubprocessError(stderr)
def doc2pdf(docPath, pdfPath):
    """
        convert a doc/docx document to pdf format
        :param doc: path to document
        """
    docPathTrue = os.path.abspath(docPath)  # bugfix - searching files in windows/system32
    if client is None:#判断环境,linux环境这里肯定为None
        return doc2pdf_linux(docPathTrue, pdfPath)
    word = gencache.EnsureDispatch('Word.Application')
    doc = word.Documents.Open(docPathTrue, ReadOnly=1)
    doc.ExportAsFixedFormat(pdfPath,
                            constants.wdExportFormatPDF,
                            Item=constants.wdExportDocumentWithMarkup,
                            CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
    word.Quit(constants.wdDoNotSaveChanges)
if __name__ == '__main__':
    wordpath='/home/qc/a.docx'
    pdfpath='/home/qc'
    doc2pdf(wordpath, pdfpath)

linux环境中的异步处理

import os
import asyncio
async def DOC2PDF(docPath, pdfPath):
    """
    convert a doc/docx document to pdf format (linux only, requires libreoffice)
    :param doc: path to document
    libreoffice --headless --convert-to pdf /root/4321.docx
    """
    cmd = 'libreoffice --headless --convert-to pdf'.split() + [docPath] + ['--outdir'] + [pdfPath]
    # await asyncio.sleep(5)
    p = await asyncio.create_subprocess_exec(*cmd, stderr=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE)
    await p.wait()
    stdout, stderr = await p.communicate()
    if stderr:
        raise subprocess.SubprocessError(stderr)

参考:

https://www.cnblogs.com/mrtop/p/11995974.html
https://zhuanlan.zhihu.com/p/140363567
版权声明:本文为作者原创,如需转载须联系作者本人同意,未经作者本人同意不得擅自转载。
添加新评论
暂无评论