从段落中查找与目标关键词最相似的关键词,并返回匹配到的关键词信息

代码如下

/**
    * 从段落中查找与目标关键词最相似的关键词,并返回匹配到的关键词信息,可设置level
    * @param strSource 原字符串
    * @param strCompared 比较字符串(标准字符串,被除数)
    * @param level 评分阀值
    * @param matchRangeIndex 比较字符串比原字符串字符位置多多少个限制
    * @return
    */
   public static SimilarInfo getSimilarDetailsInfo(String strSource, String strCompared, int level, int matchRangeIndex){
      SimilarInfo similarInfo=new SimilarInfo();
      similarInfo.setStrCompared(strCompared);
      similarInfo.setStrSource(strSource);
      HashMap<Integer,List<MatchInfo>> mapMatches=new HashMap<>();
      int count=strSource.length();
      int targetCount=strCompared.length();
      int maxSameCount=0;
      //遍历count次
      for(int i=0;i<count;i++){
         String strMatches="";
         int nowSameCount=0;
         int c=0;
         int lastIndex=0;//记录上一次匹配的目标索引
         int lastSrcIndex=0;//记录上一次匹配的源字符串索引
         int firstTargetIndex=0;
         int firstSrcIndex=0;
         int targetStartIndex=0;
         int targetEndIndex=0;
         int sourceStartIndex=0;
         int sourceEndIndex=0;
         Boolean isFirst=true;
         //遍历每一次的原字符串所有字段
         for(int j=i;j<strSource.length();j++){
            if(nowSameCount>0){
               if(j>lastSrcIndex+1+matchRangeIndex){
                  break;
               }
            }
            char charSource=strSource.charAt(j);
            for(;c<strCompared.length();c++){
               char charCompare=strCompared.charAt(c);
               if(charSource==charCompare){
                  if(isFirst==true){
                     isFirst=false;
                     firstTargetIndex=c;
                     firstSrcIndex=j;
                     //记录第一个匹配的索引
                     targetStartIndex=c;
                     sourceStartIndex=j;
                  }
                  //记录最后一个匹配的索引
                  targetEndIndex=c;
                  sourceEndIndex=j;

                  lastSrcIndex=j;
                  strMatches+=charCompare;
                  nowSameCount++;
                  lastIndex=++c;//如果匹配,手动加1
                  break;
               }
            }
            c=lastIndex;//遍历完目标字符串,记录当前匹配索引
         }
         if(nowSameCount>maxSameCount){
            maxSameCount=nowSameCount;
         }
         if(!isEmpty(strMatches)){
            List<MatchInfo> list;
            if(mapMatches.containsKey(nowSameCount)){
               list=mapMatches.get(nowSameCount);
            }
            else{
               list=new ArrayList<>();
            }
            MatchInfo matchInfo=new MatchInfo();
            matchInfo.setMatchStr(strMatches);
            matchInfo.setTargetStartIndex(targetStartIndex);
            matchInfo.setTargetEndIndex(targetEndIndex);
            matchInfo.setSourceStartIndex(sourceStartIndex);
            matchInfo.setSourceEndIndex(sourceEndIndex);
            Boolean isHas=false;
            for(MatchInfo item : list){
               if(item.getMatchStr().equals(matchInfo.getMatchStr())
                     &&item.getTargetStartIndex().equals( matchInfo.getTargetStartIndex())
                     &&item.getTargetEndIndex().equals(matchInfo.getTargetEndIndex())
                     &&item.getSourceStartIndex().equals(matchInfo.getSourceStartIndex())
                     &&item.getSourceEndIndex().equals(matchInfo.getSourceEndIndex())){
                  isHas=true;
               }
            }
            if(!isHas) {
               list.add(matchInfo);
               mapMatches.put(nowSameCount, list);
            }
         }
      }
      if(mapMatches.containsKey(maxSameCount)){
         List<MatchInfo> matchInfoList=mapMatches.get(maxSameCount);
         for(MatchInfo item :matchInfoList){
            int srcStartIndex=0;
            int srcEndIndex=0;
            if(item.getTargetStartIndex()==0){
               srcStartIndex=item.getSourceStartIndex();
            }
            else{
               srcStartIndex=item.getSourceStartIndex()-item.getTargetStartIndex();
            }
            if(item.getTargetEndIndex()==targetCount-1){
               srcEndIndex=item.getSourceEndIndex();
            }
            else{
               srcEndIndex=item.getSourceEndIndex()+(targetCount-1-item.getTargetEndIndex());
            }
            srcStartIndex=srcStartIndex<0?0:srcStartIndex;
            String sourceStr=strSource.substring(srcStartIndex,srcEndIndex+1);
            item.setSourceStr(sourceStr);
         }
         similarInfo.setMatchInfo(mapMatches.get(maxSameCount));
      }
      double dLv= (double)100*maxSameCount/targetCount;
      int realLevel=(int) Math.rint(dLv);
      Boolean isSimilar;
      if(realLevel>=level){
         isSimilar= true;
      }else{
         isSimilar= false;
      }
      similarInfo.setRealScore(realLevel);
      similarInfo.setIsSimilar(isSimilar);
      return similarInfo;
   }

 
 
效果图如下:
发布了42 篇原创文章 · 获赞 25 · 访问量 7万+

猜你喜欢

转载自blog.csdn.net/qq812858143/article/details/79867673