Lucene: Introduction to Lucene (Part V-Query)

Summary:

    1) Execute precise query using TermQuery

    2) Execute fuzzy String type range query using TermRangeQuery

    3) Execute precise Numeric type range query using NumericRangeQuery

1. We can use TermQuery to execute precise query.

2. Example as below:

    1) Main Function

package edu.xmu.lucene.Lucene_ModuleOne;

import java.io.IOException;
import java.util.List;

import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class SearcherUtil
{
	private Directory directory;
	private IndexReader reader;

	public SearcherUtil()
	{
		directory = new RAMDirectory();
	}

	public IndexSearcher getSearcher()
	{
		try
		{
			if (null == reader)
			{
				reader = IndexReader.open(directory);
			} else
			{
				IndexReader tempReader = IndexReader.openIfChanged(reader);
				if (null != tempReader)
				{
					reader.close();
					reader = tempReader;
				}
			}
		} catch (CorruptIndexException e)
		{
			e.printStackTrace();
		} catch (IOException e)
		{
			e.printStackTrace();
		}
		return new IndexSearcher(reader);
	}

	public void buildIndex(List<Student> studentList)
	{
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
				new SimpleAnalyzer(Version.LUCENE_35));
		IndexWriter writer = null;
		Document doc = null;
		try
		{
			writer = new IndexWriter(directory, config);
			for (Student student : studentList)
			{
				doc = new Document();
				doc.add(new Field("id", student.getId(), Field.Store.YES,
						Field.Index.NOT_ANALYZED));
				doc.add(new Field("name", student.getName(), Field.Store.YES,
						Field.Index.NOT_ANALYZED));
				doc.add(new Field("password", student.getPassword(),
						Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new Field("gender", student.getGender(),
						Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new NumericField("score", Field.Store.YES, true)
						.setIntValue(student.getScore()));

				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e)
		{
			e.printStackTrace();
		} catch (LockObtainFailedException e)
		{
			e.printStackTrace();
		} catch (IOException e)
		{
			e.printStackTrace();
		} finally
		{
			try
			{
				writer.close();
			} catch (CorruptIndexException e)
			{
				e.printStackTrace();
			} catch (IOException e)
			{
				e.printStackTrace();
			}
		}

	}

	/**
	 * Precise query using TermQuery
	 * 
	 * @param field
	 * @param name
	 */
	public void searchByTerm(String fieldName, String fieldValue)
	{
		IndexSearcher searcher = getSearcher();
		Query query = new TermQuery(new Term(fieldName, fieldValue));

		try
		{
			TopDocs tds = searcher.search(query, 100);
			System.out.println("Total Hits: " + tds.totalHits);

			for (ScoreDoc sd : tds.scoreDocs)
			{
				Document document = searcher.doc(sd.doc);

				System.out.println("id = " + document.get("id") + ", name = "
						+ document.get("name") + ", password = "
						+ document.get("password") + ", gender = "
						+ document.get("gender") + ", score = "
						+ document.get("score"));
			}
		} catch (IOException e)
		{
			e.printStackTrace();
		} finally
  		{
  		 try
  		 {
    			searcher.close();
   		 } catch (IOException e)
   		 {
    		  	e.printStackTrace();
   		 }
  	        }
	}
}

    2) Test Case

package edu.xmu.lucene.Lucene_ModuleOne;

import java.util.ArrayList;
import java.util.List;

import org.junit.Before;
import org.junit.Test;

public class SearcherUtilTest
{
	private SearcherUtil searcherUtil = null;

	@Before
	public void setUp()
	{
		searcherUtil = new SearcherUtil();
	}

	private void testBuildIndex()
	{
		List<Student> studentList = new ArrayList<Student>();
		Student student = new Student("1", "Davy", "Jones", "Male", 100);
		studentList.add(student);
		student = new Student("1", "Davy", "Jones", "Male", 110);
		studentList.add(student);
		student = new Student("2", "Jones", "Davy", "Male", 120);
		studentList.add(student);
		student = new Student("3", "Calyp", "Jones", "Female", 130);
		studentList.add(student);
		student = new Student("4", "Pso", "Caly", "Female", 140);
		studentList.add(student);

		searcherUtil.buildIndex(studentList);
	}
	
	@Test
	public void testSearch()
	{
		testBuildIndex();
		searcherUtil.searchByTerm("gender", "Female");
	}
}

    3) Console Output

Total Hits: 2
id = 3, name = Calyp, password = Jones, gender = Female, score = 130
id = 4, name = Pso, password = Caly, gender = Female, score = 140

    Comments:

        1) When we execute query using searchByTerm("gender", "Fema");

            The result set is empty cause there is no gender whose value equals male. This is precise query.

            The difference of precise query and fuzzy query is when we execute query above using fuzzy query, the size of result set would be 2 because "Female" contains "Fema"

3. We can use TermRangeQuery to execute range query

4. Example as below

    1) Main Function

	public void searchByTermRange(String fieldName, String fieldValueStart,
			String fieldValueEnd, int resultSize)
	{
		IndexSearcher searcher = getSearcher();

		/**
		 * @param1 fieldName : field
		 * @param2 fieldValueStart : lowerTerm
		 * @param3 fieldValueEnd : upperTerm
		 * @param4 true : includeLower
		 * @param5 true : includeUpper
		 */
		Query query = new TermRangeQuery(fieldName, fieldValueStart,
				fieldValueEnd, true, true);
		try
		{
			TopDocs tds = searcher.search(query, resultSize);
			Document document = null;
			for (ScoreDoc doc : tds.scoreDocs)
			{
				document = searcher.doc(doc.doc);

				System.out.println("id = " + document.get("id") + ", name = "
						+ document.get("name") + ", password = "
						+ document.get("password") + ", gender = "
						+ document.get("gender") + ", score = "
						+ document.get("score"));
			}
		} catch (IOException e)
		{
			e.printStackTrace();
		} finally
		{
			try
			{
				searcher.close();
			} catch (IOException e)
			{
				e.printStackTrace();
			}
		}
	}

   2) Test Case

public class SearcherUtilTest
{
	private SearcherUtil searcherUtil = null;

	@Before
	public void setUp()
	{
		searcherUtil = new SearcherUtil();
	}

	private void testBuildIndex()
	{
		List<Student> studentList = new ArrayList<Student>();
		Student student = new Student("1", "Davy", "Jones", "Male", 100);
		studentList.add(student);
		student = new Student("2", "Davy", "Jones", "Male", 110);
		studentList.add(student);
		student = new Student("3", "Jones", "Davy", "Male", 120);
		studentList.add(student);
		student = new Student("4", "Calyp", "Jones", "Female", 130);
		studentList.add(student);
		student = new Student("5", "Pso", "Caly", "Female", 140);
		studentList.add(student);

		searcherUtil.buildIndex(studentList);
	}

	@Test
	public void testSearchByTermRange()
	{
		testBuildIndex();
		searcherUtil.searchByTermRange("id", "1", "3", 100);
	}
}

   3) Console Output

id = 1, name = Davy, password = Jones, gender = Male, score = 100
id = 2, name = Davy, password = Jones, gender = Male, score = 110
id = 3, name = Jones, password = Davy, gender = Male, score = 120

    Comments:

    1) This is fuzzy query not precise query

    2) When we rebuild index as below

	private void testBuildIndex()
	{
		List<Student> studentList = new ArrayList<Student>();
		Student student = new Student("11", "Davy", "Jones", "Male", 100);
		studentList.add(student);
		student = new Student("22", "Davy", "Jones", "Male", 110);
		studentList.add(student);
		student = new Student("33", "Jones", "Davy", "Male", 120);
		studentList.add(student);
		student = new Student("44", "Calyp", "Jones", "Female", 130);
		studentList.add(student);
		student = new Student("55", "Pso", "Caly", "Female", 140);
		studentList.add(student);

		searcherUtil.buildIndex(studentList);
	}

       And execute the same query, the output is as below

id = 11, name = Davy, password = Jones, gender = Male, score = 100
id = 22, name = Davy, password = Jones, gender = Male, score = 110

      Because "1" < "11" < "2" < "22" < "3" < "33", and the range is "1" <= range <= "3". The output above is obvious.

    3) How can we query the students whose score is in the range of 100<= value <= 120? --> This is about int value query not String value.

        1) Attempt-01: Execute query for int value just like String

	@Test
	public void testSearchByTermRange()
	{
		testBuildIndex();
		searcherUtil.searchByTermRange("score", "100", "120", 100);
	}

           Output --> Empty

           Attempt-01: Failed!

       2) Attempt-02: Execute query for int value using NumericRangeQuery --> Please pay attention to this as it is precise range query.

	public void searchByNumericRange(String fieldName, int fieldValueStart,
			int fieldValueEnd, int resultSize)
	{
		IndexSearcher searcher = getSearcher();

		Query query = NumericRangeQuery.newIntRange(fieldName, fieldValueStart,
				fieldValueEnd, true, true);

		try
		{
			TopDocs tds = searcher.search(query, resultSize);
			Document document = null;
			for (ScoreDoc scoreDoc : tds.scoreDocs)
			{
				document = searcher.doc(scoreDoc.doc);

				System.out.println("id = " + document.get("id") + ", name = "
						+ document.get("name") + ", password = "
						+ document.get("password") + ", gender = "
						+ document.get("gender") + ", score = "
						+ document.get("score"));
			}
		} catch (IOException e)
		{
			e.printStackTrace();
		}
	}
	@Test
	public void testSearchByNumericRange()
	{
		testBuildIndex();
		searcherUtil.searchByNumericRange("score", 100, 120, 100);
	}
id = 11, name = Davy, password = Jones, gender = Male, score = 100
id = 22, name = Davy, password = Jones, gender = Male, score = 110
id = 33, name = Jones, password = Davy, gender = Male, score = 120

猜你喜欢

转载自davyjones2010.iteye.com/blog/1874929