java工具类实现word文档读取字段的赋值

package com.e369.elc.helper.hpia.contract;

import com.alibaba.fastjson.JSONObject;
import org.apache.commons.io.IOUtils;
import org.apache.poi.xwpf.usermodel.*;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.*;

import java.io.*;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DocxUtil {

  private static ObjectFactory factory = new ObjectFactory();
  /**
   * 合并docx
   * @param streams
   * @return
   * @throws Docx4JException
   * @throws IOException
   */
  /**
   * 合并docx
   *
   * @param streams 要合并的word文件的输入流
   * @param path 合并后的文件的路径
   */
  public static File mergeDocx(final List<InputStream> streams, String path) throws Docx4JException, IOException {

    WordprocessingMLPackage target = null;
    final File generated = new File(path);

    int chunkId = 0;
    Iterator<InputStream> it = streams.iterator();
    while (it.hasNext()) {
      InputStream is = it.next();
      if (is != null) {
        try {
          if (target == null) {
            // Copy first (master) document
            OutputStream os = new FileOutputStream(generated);
            os.write(IOUtils.toByteArray(is));
            os.close();

            target = WordprocessingMLPackage.load(generated);
          } else {
            MainDocumentPart documentPart = target.getMainDocumentPart();

            addPageBreak(documentPart); // 另起一页，换页

            insertDocx(documentPart,
                IOUtils.toByteArray(is), chunkId++);
          }
        } catch (Exception e) {
          e.printStackTrace();
        } finally {
          is.close();
        }
      }
    }

    if (target != null) {
      target.save(generated);
//            Docx4J.save(target, generated, Docx4J.FLAG_NONE);
      return generated;
    } else {
      return null;
    }
  }


  // 插入文档
  private static void insertDocx(MainDocumentPart main, byte[] bytes, int chunkId) {
    try {
      AlternativeFormatInputPart afiPart = new AlternativeFormatInputPart(
          new PartName("/part" + chunkId + ".docx"));
      // afiPart.setContentType(new ContentType(CONTENT_TYPE));
      afiPart.setBinaryData(bytes);
      Relationship altChunkRel = main.addTargetPart(afiPart);

      CTAltChunk chunk = Context.getWmlObjectFactory().createCTAltChunk();
      chunk.setId(altChunkRel.getId());

      main.addObject(chunk);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  /**
   * wordML转word，原文件不变，返回转换完成的word文件对象。
   */
  public static File wordMLToWord(File file) throws Docx4JException, IOException {
    WordprocessingMLPackage target = WordprocessingMLPackage.load(file);
    File temp = File.createTempFile(file.getName(), ".doc");
    target.save(temp);
    return temp;
  }

  /**
   * 合并wordML文档
   */
  public static void mergeWordML(List<File> list, String path) throws Docx4JException, IOException {
    final List<InputStream> streams = new ArrayList<InputStream>();
    for (int i = 0; i < list.size(); i++) {
      File file = list.get(i);
      file = DocxUtil.wordMLToWord(file); // wordML转word
      streams.add(new FileInputStream(file));
    }
    DocxUtil.mergeDocx(streams, path);
  }

  /**
   * 把文件转换成Byte[] Mapped File way MappedByteBuffer 可以在处理大文件时，提升性能
   */
  public static byte[] fileToByteArray(String filename) throws IOException {

    RandomAccessFile raf = null;
    FileChannel fc = null;
    try {
      raf = new RandomAccessFile(filename, "r");
      fc = raf.getChannel();
      MappedByteBuffer byteBuffer = fc.map(FileChannel.MapMode.READ_ONLY, 0,
          fc.size()).load();
      System.out.println(byteBuffer.isLoaded());
      byte[] result = new byte[(int) fc.size()];
      if (byteBuffer.remaining() > 0) {
        byteBuffer.get(result, 0, byteBuffer.remaining());
      }
      return result;
    } catch (IOException e) {
      e.printStackTrace();
      throw e;
    } finally {
      try {
        fc.close();
        raf.close();
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

  /**
   * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中, 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数.
   * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.
   *
   * @param word 需要编辑的文件
   * @param imageList 图片对象集合（ 图片对象属性： url            图片文件路径 keyword     文档中的图片占位符 name        图片文件名 ）
   * @throws Exception 不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)
   */
  public static void addImageToPackage(File word,
      List<JSONObject> imageList) throws Exception {

    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word);

    for (int i = 0; i < imageList.size(); i++) {
      JSONObject image = imageList.get(i);

      byte[] bytes = fileToByteArray(image.getString("url"));

      BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.
          createImagePart(wordMLPackage, bytes);

      int docPrId = 1;
      int cNvPrId = 2;
      Inline inline = imagePart.createImageInline(image.getString("name"),
          image.getString("keyword"), docPrId, cNvPrId, false);

      P paragraph = addInlineImageToParagraph(inline);

      wordMLPackage.getMainDocumentPart().addObject(paragraph);
    }

    wordMLPackage.save(word);
  }

  /**
   * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中, 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数.
   * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.
   *
   * @param wordFilePath 文件路径
   * @param imageList 图片对象集合（ 图片对象属性： url            图片文件路径 keyword     文档中的图片占位符 name        图片文件名 ）
   * @throws Exception 不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)
   */
  public static void addImageToPackage(String wordFilePath,
      List<JSONObject> imageList) throws Exception {
    addImageToPackage(new File(wordFilePath), imageList);
  }

  /**
   * 创建一个对象工厂并用它创建一个段落和一个可运行块R. 然后将可运行块添加到段落中. 接下来创建一个图画并将其添加到可运行块R中. 最后我们将内联 对象添加到图画中并返回段落对象.
   *
   * @param inline 包含图片的内联对象.
   * @return 包含图片的段落
   */
  private static P addInlineImageToParagraph(Inline inline) {
    // 添加内联对象到一个段落中
    P paragraph = factory.createP();
    R run = factory.createR();
    paragraph.getContent().add(run);
    Drawing drawing = factory.createDrawing();
    run.getContent().add(drawing);
    drawing.getAnchorOrInline().add(inline);
    return paragraph;
  }

  /**
   * 文档结尾添加一个空白页
   */
  public static void addPageBreak(File word) throws Docx4JException {

    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word);

    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();

    Br breakObj = new Br();
    breakObj.setType(STBrType.PAGE);

    P paragraph = factory.createP();
    paragraph.getContent().add(breakObj);
    documentPart.getJaxbElement().getBody().getContent().add(paragraph);
    wordMLPackage.save(word);
  }

  /**
   * 文档结尾添加一个空白页
   */
  public static void addPageBreak(MainDocumentPart documentPart) {
    Br breakObj = new Br();
    breakObj.setType(STBrType.PAGE);

    P paragraph = factory.createP();

    paragraph.getContent().add(breakObj);

    documentPart.getJaxbElement().getBody().getContent().add(paragraph);
  }

  /**
   * 文档结尾添加一个空白页
   */
  public static void addPageBreak(String wordFilePath) throws Docx4JException {
    addPageBreak(new File(wordFilePath));
  }


  public static void main(String[] args) throws Exception {
    File file1 = new File("C:\\Users\\Administrator\\Desktop\\contactA123.docx");
    File file2 = new File("C:\\Users\\Administrator\\Desktop\\contactA123.docx");
    List<File> files = new ArrayList<File>();
    files.add(file1);
    files.add(file2);
    DocxUtil.mergeWordML(files, "C:\\Users\\Administrator\\Desktop\\contact_all.docx");
  }

  /**
   * 替换段落里面的变量
   *
   * @param doc 要替换的文档
   * @param params 参数
   */
  public static void replaceInPara(XWPFDocument doc, Map<String, Object> params) {
    Iterator<XWPFParagraph> iterator = doc.getParagraphsIterator();
    XWPFParagraph para;
    while (iterator.hasNext()) {
      para = iterator.next();
      replaceInPara(para, params);
    }
  }

  /**
   * 替换段落里面的变量
   *
   * @param para 要替换的段落
   * @param params 参数
   */
  public static void replaceInPara(XWPFParagraph para, Map<String, Object> params) {
    List<XWPFRun> runs;
    Matcher matcher;
    if (matcher(para.getParagraphText()).find()) {
      runs = para.getRuns();
      for (int i = 0; i < runs.size(); i++) {
        XWPFRun run = runs.get(i);
        String runText = run.toString();
        matcher = matcher(runText);
        if (matcher.find()) {
          while ((matcher = matcher(runText)).find()) {
            runText = matcher.replaceFirst(String.valueOf(params.get(matcher.group(1))));
          }
          //直接调用XWPFRun的setText()方法设置文本时，在底层会重新创建一个XWPFRun，把文本附加在当前文本后面，
          //所以我们不能直接设值，需要先删除当前run,然后再自己手动插入一个新的run。
          //para.removeRun(i);
          //para.insertNewRun(i).setText(runText);
          run.setText(runText, 0);
        }
      }
    }
  }

  /**
   * 替换表格里面的变量
   *
   * @param doc 要替换的文档
   * @param params 参数
   */
  public static void replaceInTable(XWPFDocument doc, Map<String, Object> params) {
    Iterator<XWPFTable> iterator = doc.getTablesIterator();
    XWPFTable table;
    List<XWPFTableRow> rows;
    List<XWPFTableCell> cells;
    List<XWPFParagraph> paras;
    while (iterator.hasNext()) {
      table = iterator.next();
      rows = table.getRows();
      for (XWPFTableRow row : rows) {
        cells = row.getTableCells();
        for (XWPFTableCell cell : cells) {
          paras = cell.getParagraphs();
          for (XWPFParagraph para : paras) {
            replaceInPara(para, params);
          }
        }
      }
    }
  }

  /**
   * 正则匹配字符串
   */
  public static Matcher matcher(String str) {
    Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(str);
    return matcher;
  }
}
java工具类实现word文档读取字段的赋值

猜你喜欢