方法1
import sys
import os
from win32com import client as wc
print(sys.version)
openfile = os.getcwd()+'\\'+sys.argv[1]
print(openfile)
word = wc.Dispatch('Word.Application')
doc = word.Documents.Open(openfile)
savefile = os.getcwd()+'\\'+sys.argv[1].split('.')[0]+'.txt'
print(savefile)
doc.SaveAs(savefile, 4)
doc.Close()
word.Quit()
方法2
# a script that converts word file to txt files
# requires word application on Windows machine
# requirement:
# 1. Windows platform
# 2. python 2.7
# 3. pywin32, download from http://sourceforge.net/projects/pywin32/
# 4. word application installed on running machine
from win32com.client import constants, Dispatch
import pythoncom
import glob
import os
from zipfile import ZipFile
# convert the word file to a text file.
# @arg wordapp: The word IDispatch object
# @arg wordfile: The word file name
# @returns: The txt file name
def convert_to_text(wordapp, wordfile):
name, ext = os.path.splitext(wordfile)
if ext != '.doc' and ext != '.docx':
return None
txtfile = name + '.txt'
print txtfile
wordapp.Documents.Open(os.path.abspath(wordfile))
wdFormatTextLineBreaks = 3
wordapp.ActiveDocument.SaveAs(os.path.abspath(txtfile),
FileFormat=wdFormatTextLineBreaks)
wordapp.ActiveDocument.Close()
return txtfile
# a generator that iterates all doc files in the current work dir
def next_doc():
for d in glob.glob('*.doc'):
yield d
for d in glob.glob('*.docx'):
yield d
# convert all doc/docx files and zip all output txt files as the zipfilename
def convert_and_zip(zipfilename):
word = Dispatch("Word.Application")
with ZipFile(zipfilename, 'w') as fzip:
for doc in next_doc():
print 'converting ', doc, '...'
txtfile = convert_to_text(word, doc)
if txtfile:
fzip.write(txtfile)
word.Quit()