入门例子
maven 引入
引入基本的 maven 依赖。
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<!-- Lucene核心库 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.2.1</version>
</dependency>
<!-- Lucene解析库 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.2.1</version>
</dependency>
<!-- Lucene附加的分析库 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>7.2.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
</dependencies>
创建索引
实例代码如下:
package com.github.houbb.lucene.learn;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* @author binbin.hou
* @date 2018/11/15 11:24
*/
public class Indexer {
/**
* 写索引实例
*/
private IndexWriter writer;
/**
* 构造方法 实例化IndexWriter
*
* @param indexDir
* @throws IOException
*/
public Indexer(String indexDir) throws IOException {
//得到索引所在目录的路径
Directory directory = FSDirectory.open(Paths.get(indexDir));
// 标准分词器
Analyzer analyzer = new StandardAnalyzer();
//保存用于创建IndexWriter的所有配置。
IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
//实例化IndexWriter
writer = new IndexWriter(directory, iwConfig);
}
/**
* 关闭写索引
*
* @return 索引了多少个文件
* @throws Exception
*/
public void close() throws IOException {
writer.close();
}
public int index(String dataDir) throws Exception {
File[] files = new File(dataDir).listFiles();
for (File file : files) {
//索引指定文件
indexFile(file);
}
//返回索引了多少个文件
return writer.numDocs();
}
/**
* 索引指定文件
*
* @param f
*/
private void indexFile(File f) throws Exception {
//输出索引文件的路径
System.out.println("索引文件:" + f.getCanonicalPath());
//获取文档,文档里再设置每个字段
Document doc = getDocument(f);
//开始写入,就是把文档写进了索引文件里去了;
writer.addDocument(doc);
}
/**
* 获取文档,文档里再设置每个字段
*
* @param f
* @return document
*/
private Document getDocument(File f) throws Exception {
Document doc = new Document();
//把设置好的索引加到Document里,以便在确定被索引文档
doc.add(new TextField("contents", new FileReader(f)));
//Field.Store.YES:把文件名存索引文件里,为NO就说明不需要加到索引文件里去
doc.add(new TextField("fileName", f.getName(), Field.Store.YES));
//把完整路径存在索引文件里
doc.add(new TextField("fullPath", f.getCanonicalPath(), Field.Store.YES));
return doc;
}
public static void main(String[] args) {
//索引指定的文档路径
String indexDir = "D:\\lucene\\dataindex";
////被索引数据的路径
String dataDir = "D:\\lucene\\data";
Indexer indexer = null;
int numIndexed = 0;
//索引开始时间
long start = System.currentTimeMillis();
try {
indexer = new Indexer(indexDir);
numIndexed = indexer.index(dataDir);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
indexer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
//索引结束时间
long end = System.currentTimeMillis();
System.out.println("索引:" + numIndexed + " 个文件 花费了" + (end - start) + " 毫秒");
}
}
使用索引
package com.github.houbb.lucene.learn;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* @author binbin.hou
* @date 2018/11/15 11:35
*/
public class Searcher {
public static void search(String indexDir, String q) throws Exception {
// 得到读取索引文件的路径
Directory dir = FSDirectory.open(Paths.get(indexDir));
// 通过dir得到的路径下的所有的文件
IndexReader reader = DirectoryReader.open(dir);
// 建立索引查询器
IndexSearcher is = new IndexSearcher(reader);
// 实例化分析器
Analyzer analyzer = new StandardAnalyzer();
// 建立查询解析器
/**
* 第一个参数是要查询的字段; 第二个参数是分析器Analyzer
*/
QueryParser parser = new QueryParser("contents", analyzer);
// 根据传进来的p查找
Query query = parser.parse(q);
// 计算索引开始时间
long start = System.currentTimeMillis();
// 开始查询
/**
* 第一个参数是通过传过来的参数来查找得到的query; 第二个参数是要出查询的行数
*/
TopDocs hits = is.search(query, 10);
// 计算索引结束时间
long end = System.currentTimeMillis();
System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
// 遍历hits.scoreDocs,得到scoreDoc
/**
* ScoreDoc:得分文档,即得到文档 scoreDocs:代表的是topDocs这个文档数组
*
* @throws Exception
*/
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fullPath"));
}
// 关闭reader
reader.close();
}
public static void main(String[] args) {
String indexDir = "D:\\lucene\\dataindex";
//我们要搜索的内容
String q = "lucene";
try {
search(indexDir, q);
} catch (Exception e) {
e.printStackTrace();
}
}
}
V9.0.0 例子
官方例子
Analyzer analyzer = new StandardAnalyzer();
Path indexPath = Files.createTempDirectory("tempIndex");
Directory directory = FSDirectory.open(indexPath);
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter iwriter = new IndexWriter(directory, config);
Document doc = new Document();
String text = "This is the text to be indexed.";
doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
// Parse a simple query that searches for "text":
QueryParser parser = new QueryParser("fieldname", analyzer);
Query query = parser.parse("text");
ScoreDoc[] hits = isearcher.search(query, 10).scoreDocs;
assertEquals(1, hits.length);
// Iterate through the results:
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
}
ireader.close();
directory.close();
IOUtils.rm(indexPath);
#
参考资料
https://lucene.apache.org/core/9_0_0/core/index.html
https://lucene.apache.org/core/9_0_0/index.html