入门例子

maven 引入

引入基本的 maven 依赖。

  [xml]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
<dependencies> <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --> <!-- Lucene核心库 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>7.2.1</version> </dependency> <!-- Lucene解析库 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>7.2.1</version> </dependency> <!-- Lucene附加的分析库 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>7.2.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.10</version> </dependency> </dependencies>

创建索引

实例代码如下:

  [java]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package com.github.houbb.lucene.learn; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * @author binbin.hou * @date 2018/11/15 11:24 */ public class Indexer { /** * 写索引实例 */ private IndexWriter writer; /** * 构造方法 实例化IndexWriter * * @param indexDir * @throws IOException */ public Indexer(String indexDir) throws IOException { //得到索引所在目录的路径 Directory directory = FSDirectory.open(Paths.get(indexDir)); // 标准分词器 Analyzer analyzer = new StandardAnalyzer(); //保存用于创建IndexWriter的所有配置。 IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); //实例化IndexWriter writer = new IndexWriter(directory, iwConfig); } /** * 关闭写索引 * * @return 索引了多少个文件 * @throws Exception */ public void close() throws IOException { writer.close(); } public int index(String dataDir) throws Exception { File[] files = new File(dataDir).listFiles(); for (File file : files) { //索引指定文件 indexFile(file); } //返回索引了多少个文件 return writer.numDocs(); } /** * 索引指定文件 * * @param f */ private void indexFile(File f) throws Exception { //输出索引文件的路径 System.out.println("索引文件:" + f.getCanonicalPath()); //获取文档,文档里再设置每个字段 Document doc = getDocument(f); //开始写入,就是把文档写进了索引文件里去了; writer.addDocument(doc); } /** * 获取文档,文档里再设置每个字段 * * @param f * @return document */ private Document getDocument(File f) throws Exception { Document doc = new Document(); //把设置好的索引加到Document里,以便在确定被索引文档 doc.add(new TextField("contents", new FileReader(f))); //Field.Store.YES:把文件名存索引文件里,为NO就说明不需要加到索引文件里去 doc.add(new TextField("fileName", f.getName(), Field.Store.YES)); //把完整路径存在索引文件里 doc.add(new TextField("fullPath", f.getCanonicalPath(), Field.Store.YES)); return doc; } public static void main(String[] args) { //索引指定的文档路径 String indexDir = "D:\\lucene\\dataindex"; ////被索引数据的路径 String dataDir = "D:\\lucene\\data"; Indexer indexer = null; int numIndexed = 0; //索引开始时间 long start = System.currentTimeMillis(); try { indexer = new Indexer(indexDir); numIndexed = indexer.index(dataDir); } catch (Exception e) { e.printStackTrace(); } finally { try { indexer.close(); } catch (Exception e) { e.printStackTrace(); } } //索引结束时间 long end = System.currentTimeMillis(); System.out.println("索引:" + numIndexed + " 个文件 花费了" + (end - start) + " 毫秒"); } }

使用索引

  [java]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package com.github.houbb.lucene.learn; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * @author binbin.hou * @date 2018/11/15 11:35 */ public class Searcher { public static void search(String indexDir, String q) throws Exception { // 得到读取索引文件的路径 Directory dir = FSDirectory.open(Paths.get(indexDir)); // 通过dir得到的路径下的所有的文件 IndexReader reader = DirectoryReader.open(dir); // 建立索引查询器 IndexSearcher is = new IndexSearcher(reader); // 实例化分析器 Analyzer analyzer = new StandardAnalyzer(); // 建立查询解析器 /** * 第一个参数是要查询的字段; 第二个参数是分析器Analyzer */ QueryParser parser = new QueryParser("contents", analyzer); // 根据传进来的p查找 Query query = parser.parse(q); // 计算索引开始时间 long start = System.currentTimeMillis(); // 开始查询 /** * 第一个参数是通过传过来的参数来查找得到的query; 第二个参数是要出查询的行数 */ TopDocs hits = is.search(query, 10); // 计算索引结束时间 long end = System.currentTimeMillis(); System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录"); // 遍历hits.scoreDocs,得到scoreDoc /** * ScoreDoc:得分文档,即得到文档 scoreDocs:代表的是topDocs这个文档数组 * * @throws Exception */ for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } // 关闭reader reader.close(); } public static void main(String[] args) { String indexDir = "D:\\lucene\\dataindex"; //我们要搜索的内容 String q = "lucene"; try { search(indexDir, q); } catch (Exception e) { e.printStackTrace(); } } }

V9.0.0 例子

官方例子

  [java]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
Analyzer analyzer = new StandardAnalyzer(); Path indexPath = Files.createTempDirectory("tempIndex"); Directory directory = FSDirectory.open(indexPath); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = new IndexWriter(directory, config); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, TextField.TYPE_STORED)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.parse("text"); ScoreDoc[] hits = isearcher.search(query, 10).scoreDocs; assertEquals(1, hits.length); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } ireader.close(); directory.close(); IOUtils.rm(indexPath);

#

参考资料

https://lucene.apache.org/core/9_0_0/core/index.html

https://lucene.apache.org/core/9_0_0/index.html

Lucene开发环境搭建-Maven

Lucene介绍与入门使用

LUCENE入门案例

技术实现细节

Lucene概述

Elasticsearch内核剖析