问题:如何使用win32com批量将word转换成html
方法:
# -*- coding: utf-8 -*-
import sys
import platform
import time
if platform.system() != 'Windows':
print(u"本脚本只能在Windows下运行")
sys.exit()
import os
import win32com.client as w32c
wa = w32c.gencache.EnsureDispatch('Word.Application')
# wa = w32c.Dispatch('Word.Application')
# wa.Visible = True
file_list = []
for root, subdir, files in os.walk("."):
for fn in files:
if (fn.endswith(".doc") or fn.endswith(".docx")) and not fn.startswith("~"):
file_list.append(os.path.join(os.path.realpath(root), fn))
for fn in file_list:
print(fn)
doc = wa.Documents.Open(fn)
if fn[-4:] == '.doc':
hn = "%s.htm" % fn[:-4]
elif fn[-4:] == 'docx':
hn = "%s.htm" % fn[:-5]
# see: WdSaveFormat Enumeration
doc.SaveAs(FileName=hn, FileFormat=10)
wa.Documents.Close()
# 也可以:
# doc.Close()
# del doc
time.sleep(0.2)
print("")
print(u"敲任何键结束")
input()
wa.Quit()