类别

全部

MQ	VUE	Git	POI	J2SE	HTML	IDEA	JSON	Flume
Storm	Linux	MyCat	Dubbo	MySQL	JSOUP	Layui	Shiro	Nginx
easyui	Lucene	网络	Tomcat	python	Spring	Docker	Crawler	CentOS7
Windows	HTMLUnit	Exception	HTMLClient	JavaScript	Springboot	WebService	Java 书籍	SpringCloud
三方登录	微信支付	Elasticsearch	SpringSecurity	Spring Data JPA	CAS单点登录	富文本编辑器	支付宝第三方支付

Lucene 查询索引

发表于 2024-04-06 22:47:31 阅读(191) 博客类别：Lucene

Lucene 查询索引

Lucene 常用查询器 介绍和基本使用
    TermQuery 精准匹配 一般不推荐使用
    QueryParser 查询解析器  可以实现 模糊匹配，精准匹配，多词查询时的各种组合
    	多词查询组合：
    		apple~  表示只要包含apple的就都查询出来
    		apple orange 表示包含apple或者orange的都查询出来
    		apple AND orange 表示同时包含apple和orange的才会查询出来

创建索引

// 对指定目录下的文件生成索引
    public static void main(String[] args) throws Exception{
        // 1. 获取索引存放的路径
        Directory dir = FSDirectory.open(Paths.get("D://lucene"));
        // 2. 创建写索引对象
        IndexWriter writer = new IndexWriter(dir,new IndexWriterConfig(new StandardAnalyzer()));
        // 3. 遍历原始数据 然后对每个文件中的数据进行写索引
        File[] files = new File("D://lucene//data").listFiles();
        for (File file:files){
            // 4. 创建文档对象，每条数据就是一个文档对象
            Document doc = new Document();
            // 5. 添加需要分词的字段
            doc.add(new TextField("contents",new FileReader(file)));
            doc.add(new TextField("fileName", file.getName(), Field.Store.YES));
            doc.add(new TextField("fullPath",file.getCanonicalPath(),Field.Store.YES));
            // 执行写索引
            writer.addDocument(doc);
        }
        System.out.println("执行了 "+writer.numRamDocs()+" 个文件");
        writer.close();
    }

TermQuery

package com.et.com.et.lucene03;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.Paths;

/**
 * @Author: ETJAVA
 * @CreateTime: 2024-04-06  22:36
 * @Description: TODO 精准查询
 * @Version: 1.0
 */
public class TestTermQuery {

    // 索引所在的目录
    private static String INDEX_DIR="D://lucene";
    /**
     * TermQuery方式 查询指定的内容【精准匹配 一般不适用】
     */
    private static void searchByTermQuery() throws Exception {
        // 获取索引存放的路径
        Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
        // 创建IndexReader对象 用来读取索引信息
        IndexReader reader = DirectoryReader.open(dir);
        // 创建查询器
        IndexSearcher is = new IndexSearcher(reader);
        // 定义查询内容
        String q = "builder";
        // 封装要查询的字段和内容
        Term term = new Term("contents",q);
        // 定义查询器TermQuery  精准匹配
        Query query = new TermQuery(term);
        // 执行查询 返回TopDocs
        TopDocs topDocs = is.search(query, 10);
        System.out.println("查询 "+q+" 共查询到 "+topDocs.totalHits.value+" 个文档");
        // 遍历结果集 获取需要的信息
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            // 封装获取的内容 scoreDoc.doc返回文档ID
            Document doc = is.doc(scoreDoc.doc);
            System.out.println("文档路径："+doc.get("fullPath"));
            System.out.println("文档名称："+doc.get("fileName"));
        }
        reader.close();
    }

    public static void main(String[] args) throws Exception {
        searchByTermQuery();
    }
}

QueryParser

package com.et.com.et.lucene03;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.Paths;

/**
 * @Author: ETJAVA
 * @CreateTime: 2024-04-06  22:40
 * @Description: TODO
 * @Version: 1.0
 */
public class TestQueryParser {

    // 索引所在的目录
    private static String INDEX_DIR="D://lucene";
    /*
     * QueryParse方式查询
     * 匹配度可以自定义
     * */
    private static void searchByQueryParse() throws Exception {
        // 获取索引存放的路径
        Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
        // 创建IndexReader对象 用来读取索引信息 相当于FileReader(new File("xxx"))
        IndexReader reader = DirectoryReader.open(dir);
        // 创建查询器
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        // 创建分词器
        Analyzer analyzer = new StandardAnalyzer();
        // 创建查询解析器 封装查询的字段和指定分词器
        QueryParser queryParser = new QueryParser("contents",analyzer);
        // 定义要查询的内容
        String q = "builde22r~ spring";
        /*
            解析查询 - 封装要查询的内容
            多个单词之间 如果使用或者的关系查询 直接空格隔开即可 或 a or b 使用or隔开
            如果是并且的关系 使用大写的AND进行关联即可
            如果是查询相似的单词 使用~ 取反符号
         */
        Query query = queryParser.parse(q);
        /*
            执行查询 并指定返回的最大条数
            如果数据量较大 都放内存分页是不现实的，可以借助其他缓存插件实现
            如果数据量适中 每次查询出来的数据 然后在内存中进行分页即可
         */
        TopDocs topDocs = indexSearcher.search(query, 10);
        // 遍历需要的信息
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            System.out.println("文档路径："+doc.get("fullPath"));
            System.out.println("文档名称："+doc.get("fileName"));
        }
        reader.close();
    }

    public static void main(String[] args) throws Exception {
        searchByQueryParse();
    }
}

上一篇： Lucene 文档域加权

下一篇：Lucene 代码高亮显示