本文用的是poi3.17的版本。其中Word转HTML和excel转HTML,两个一个需要3.17的jar包,一个需要3.14的某个jar,二者冲突,具体是怎样,当时没记录忘记了。
jar包可以自己去搜:点击打开链接
或者去下载完整的:poi3.17等jar包
1、Excel07转HTML
/**
* excel07转html filename:要读取的文件所在文件夹 filepath:文件名 htmlname:生成html名称
* path:html存放路径
* */
public static void ExcelToHtml(String filename, String htmlpath,
String htmlname) throws Exception {
//Workbook workbook = null;
InputStream is = new FileInputStream(filename);
try {
String html = "";
XSSFWorkbook workbook = new XSSFWorkbook(is);
for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
Sheet sheet = workbook.getSheetAt(numSheet);
if (sheet == null) {
continue;
}
html += "=======================" + sheet.getSheetName()
+ "=========================<br><br>";
int firstRowIndex = sheet.getFirstRowNum();
int lastRowIndex = sheet.getLastRowNum();
html += "<table border='1' align='left'>";
Row firstRow = sheet.getRow(firstRowIndex);
for (int i = firstRow.getFirstCellNum(); i <= firstRow
.getLastCellNum(); i++) {
Cell cell = firstRow.getCell(i);
String cellValue = getCellValue(cell, true);
html += "<th>" + cellValue + "</th>";
}
// 行
for (int rowIndex = firstRowIndex + 1; rowIndex <= lastRowIndex; rowIndex++) {
Row currentRow = sheet.getRow(rowIndex);
html += "<tr>";
if (currentRow != null) {
int firstColumnIndex = currentRow.getFirstCellNum();
int lastColumnIndex = currentRow.getLastCellNum();
// 列
for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
Cell currentCell = currentRow.getCell(columnIndex);
String currentCellValue = getCellValue(currentCell,
true);
html += "<td>" + currentCellValue + "</td>";
}
} else {
html += " ";
}
html += "</tr>";
}
html += "</table>";
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "gbk");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
FileUtils.writeStringToFile(new File(htmlpath, htmlname), html,
"gbk");
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 读取单元格
*
*/
private static String getCellValue(Cell cell, boolean treatAsStr) {
if (cell == null) {
return "";
}
if (treatAsStr) {
cell.getCellTypeEnum();
}
if (cell.getCellTypeEnum() == CellType.BOOLEAN) {
return String.valueOf(cell.getBooleanCellValue());
} else if (cell.getCellTypeEnum() == CellType.NUMERIC) {
return String.valueOf(cell.getNumericCellValue());
} else {
return String.valueOf(cell.getStringCellValue());
}
}
}
2、Excel03版本转HTML
/**
*
* @param wordpath
* word文件
* @param htmlpath
* HTML路径
* @param htmlname
* HTML名字
* @throws Throwable
*/
public static void excelToHtml(String wordpath, String htmlpath,
String htmlname) throws Throwable {
InputStream input = new FileInputStream(wordpath);
HSSFWorkbook excelBook = new HSSFWorkbook(input);
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(htmlpath
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = excelToHtmlConverter.getDocument();
// 去掉Excel头行
excelToHtmlConverter.setOutputColumnHeaders(false);
// 去掉Excel行号
excelToHtmlConverter.setOutputRowNumbers(false);
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "gbk");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
"gbk");
}
3、Word07转HTML
/**
* word07版本(.docx)转html
* poi:word07在线预览
* */
public static void PoiWord07ToHtml () throws IOException{
String path= "";
String file = "";
String file2 ="";
File f = new File(file);
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
//读取文档内容
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
File imageFolderFile = new File(path);
//加载html页面时图片路径
XHTMLOptions options = XHTMLOptions.create().URIResolver( new BasicURIResolver("./"));
//图片保存文件夹路径
options.setExtractor(new FileImageExtractor(imageFolderFile));
OutputStream out = new FileOutputStream(new File(file2));
XHTMLConverter.getInstance().convert(document, out, options);
out.close();
} else {
System.out.println("Enter only MS Office 2007+ files");
}
}
}
4、Word03转HTML
/**
*
* @param wordpath
* word文件
* @param htmlpath
* HTML路径
* @param htmlname
* HTML名字
* @throws Throwable
*/
public static void wordToHtml(String wordpath, String htmlpath,
String htmlname) throws Throwable {
InputStream input = new FileInputStream(wordpath);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType
pictureType,
String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
// @Override
// public String savePicture(byte[] content, PictureType pictureType,
// String suggestedName) {
// return suggestedName;
// }
});
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(htmlpath
+htmlname+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "gbk");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(htmlpath, htmlname), content,
"gbk");
}
5、ppt07转HTML
public static void pptToHtml(String path, String infile, String htmlname,
String imgname) throws IOException{
File file = new File(infile);
String imghtml = "";
FileOutputStream out = null;
XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
// getting the dimensions and size of the slide
Dimension pgsize = ppt.getPageSize();
List<XSLFSlide> slide = ppt.getSlides();
try {
for (int i = 0; i < slide.size(); i++) {
// 解决乱码问题
for (XSLFShape shape : slide.get(i).getShapes()) {
if (shape instanceof XSLFTextShape) {
XSLFTextShape tsh = (XSLFTextShape) shape;
for (XSLFTextParagraph p : tsh) {
for (XSLFTextRun r : p) {
r.setFontFamily("宋体");
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width,
pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
// clear the drawing area
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
pgsize.height));
// render
slide.get(i).draw(graphics);
// RenderedImage imag =img;
// creating an image file as output
String imgs = path + imgname + (i + 1) + ".png";
imghtml += "<img src=\'"
+ imgs
+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
out = new FileOutputStream(imgs);
javax.imageio.ImageIO.write(img, "png", out);
// ppt.write(out);
}
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer;
serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"
+ imghtml + "</body></html>";
FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
"utf-8");
System.out.println("Image successfully created");
out.close();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
6、ppt03转HTML
public static void pptTohTML(String path, String infile, String htmlname,
String imgname) throws IOException{
// creating an empty presentation
File file = new File(infile);
String imghtml = "";
FileOutputStream out = null;
HSLFSlideShow ppt = new HSLFSlideShow(new FileInputStream(file));
// getting the dimensions and size of the slide
Dimension pgsize = ppt.getPageSize();
List<HSLFSlide> slide = ppt.getSlides();
try {
for (int i = 0; i < slide.size(); i++) {
// 解决乱码问题
for (HSLFShape shape : slide.get(i).getShapes()) {
if (shape instanceof HSLFTextShape) {
HSLFTextShape tsh = (HSLFTextShape) shape;
for (HSLFTextParagraph p : tsh) {
for (HSLFTextRun r : p) {
r.setFontFamily("宋体");
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width,
pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
// clear the drawing area
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width,
pgsize.height));
// render
slide.get(i).draw(graphics);
// RenderedImage imag =img;
// creating an image file as output
String imgs = path + imgname + (i + 1) + ".png";
imghtml += "<img src=\'"
+ imgs
+ "\' style=\'width:80%;vertical-align:text-bottom; \' border='1'><br><br><br><br>";
out = new FileOutputStream(imgs);
javax.imageio.ImageIO.write(img, "png", out);
// ppt.write(out);
}
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer;
serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
String ppthtml = "<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>" +
"<div text-align='center'>"
+ imghtml + "<div></body></html>";
FileUtils.writeStringToFile(new File(path, htmlname), ppthtml,
"utf-8");
System.out.println("Image successfully created");
out.close();
} catch (TransformerConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
7、TXT转HTML
public static void txtToHtml(String filePath, String htmlPosition) {
try {
String encoding = "GBK";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判断文件是否存在
InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);
// 考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
// 写文件
FileOutputStream fos = new FileOutputStream(new File(htmlPosition));
OutputStreamWriter osw = new OutputStreamWriter(fos, "gbk");
BufferedWriter bw = new BufferedWriter(osw);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
bw.write(lineTxt + "</br>");
}
bw.close();
osw.close();
fos.close();
read.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
}