springboo3单体应用集成搜索引擎

springboo单体应用集成搜索引擎Apache Lucene 的封装库：Whoosh/IKAnalyzer

先集成IKAnalyzer（解决 Lucene 中文分词问题），再集成Whoosh-java（简化 Lucene 的索引 / 检索操作）

一、核心前提说明

IKAnalyzer：不是 Lucene 的完整封装，而是专注于中文分词的工具，弥补 Lucene 原生分词对中文支持差的问题，需配合 Lucene 核心使用。
Whoosh-java：是 Python Whoosh 的 Java 移植版，对 Lucene 做了轻量级封装，简化了索引创建、查询构建等底层操作，降低开发成本。
两者结合：Whoosh-java 简化 Lucene 使用，IKAnalyzer 提供中文分词能力，是嵌入式检索的高效组合。

二、完整集成步骤（Spring Boot 3.x）

1. 引入依赖（pom.xml）

首先在pom.xml中添加 Whoosh-java、Lucene 核心、IKAnalyzer 的依赖：

<!-- Whoosh-java（Lucene封装库） -->
<dependency>
    <groupId>com.github.linguanghua</groupId>
    <artifactId>whoosh-java</artifactId>
    <version>0.1.5</version>
    <!-- 排除内置的lucene依赖，统一版本 -->
    <exclusions>
        <exclusion>
            <groupId>org.apache.lucene</groupId>
            <artifactId>*</artifactId>
        </exclusion>
    </exclusions>
</dependency>

<!-- Lucene核心（统一版本，适配Spring Boot） -->
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>9.10.0</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queryparser</artifactId>
    <version>9.10.0</version>
</dependency>

<!-- IKAnalyzer（中文分词） -->
<dependency>
    <groupId>com.janeluo</groupId>
    <artifactId>ikanalyzer</artifactId>
    <version>2012_u6</version>
    <!-- 排除冲突依赖 -->
    <exclusions>
        <exclusion>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
        </exclusion>
    </exclusions>
</dependency>

2. 自定义 IK 分词器适配 Lucene 9.x

IKAnalyzer 默认适配 Lucene 旧版本，需简单封装以兼容 Lucene 9.x：

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.wltea.analyzer.lucene.IKTokenizer;

/**
 * 适配Lucene 9.x的IK中文分词器
 */
public class IKAnalyzerLucene9 extends Analyzer {
    // 是否使用智能分词（true：智能分词，false：细粒度分词）
    private final boolean useSmart;

    public IKAnalyzerLucene9() {
        this(true);
    }

    public IKAnalyzerLucene9(boolean useSmart) {
        this.useSmart = useSmart;
    }

    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
        // 创建IK分词器Tokenzier
        Tokenizer tokenizer = new IKTokenizer(null, useSmart);
        return new TokenStreamComponents(tokenizer);
    }
}

3. 封装 Whoosh-java 检索服务

基于 Whoosh-java 封装索引创建、数据新增、检索的核心服务，结合 IK 分词器：

import org.apache.lucene.queryparser.classic.ParseException;
import org.springframework.stereotype.Service;
import whoosh.Document;
import whoosh.Index;
import whoosh.Query;
import whoosh.Whoosh;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 基于Whoosh-java + IKAnalyzer的嵌入式检索服务
 */
@Service
public class WhooshSearchService {
    // 索引存储目录（内存索引：RAMDirectory，重启丢失；文件索引：FSDirectory）
    private final Index index;
    // 中文分词器
    private final IKAnalyzerLucene9 analyzer;

    /**
     * 初始化索引和分词器
     */
    public WhooshSearchService() throws IOException {
        // 1. 初始化IK分词器（智能分词模式）
        analyzer = new IKAnalyzerLucene9(true);

        // 2. 初始化索引（两种方式可选）
        // 方式1：内存索引（无需磁盘，适合测试/小数据量）
        index = Whoosh.createInMemory(analyzer);

        // 方式2：文件索引（持久化，适合生产）
        // String indexPath = "/tmp/whoosh-index"; // 自定义索引存储路径
        // index = Whoosh.create(indexPath, analyzer);

        // 3. 初始化测试数据（可选）
        initTestData();
    }

    /**
     * 初始化测试索引数据
     */
    private void initTestData() throws IOException {
        // 添加文档：字段名（name/category/price） + 字段值
        addDocument(Map.of(
                "id", "1",
                "name", "华为Mate60 Pro手机",
                "category", "数码产品",
                "price", "6999.0"
        ));
        addDocument(Map.of(
                "id", "2",
                "name", "苹果iPhone 15 Pro",
                "category", "数码产品",
                "price", "7999.0"
        ));
        addDocument(Map.of(
                "id", "3",
                "name", "小米笔记本Pro",
                "category", "数码产品",
                "price", "5999.0"
        ));
    }

    /**
     * 新增文档到索引
     * @param fieldMap 字段键值对（如：id、name、category等）
     */
    public void addDocument(Map<String, String> fieldMap) throws IOException {
        Document doc = new Document();
        // 遍历字段，添加到文档（TextField：分词存储，StringField：精确存储）
        fieldMap.forEach((field, value) -> {
            if ("name".equals(field)) {
                // 名称字段：分词存储（支持检索）
                doc.addTextField(field, value);
            } else {
                // 其他字段：精确存储（仅存储，不分词）
                doc.addStringField(field, value);
            }
        });
        // 写入索引
        index.addDocument(doc);
        // 提交索引（确保数据生效）
        index.commit();
    }

    /**
     * 检索文档
     * @param keyword 检索关键词
     * @param field 检索字段（如name）
     * @return 检索结果列表
     */
    public List<Map<String, String>> search(String keyword, String field) throws IOException, ParseException {
        // 1. 构建查询（检索指定字段包含关键词）
        Query query = Query.parse(field + ":" + keyword, analyzer);

        // 2. 执行查询（返回前10条）
        List<Document> docs = index.search(query, 10);

        // 3. 解析结果
        List<Map<String, String>> result = new ArrayList<>();
        for (Document doc : docs) {
            result.add(Map.of(
                    "id", doc.get("id"),
                    "name", doc.get("name"),
                    "category", doc.get("category"),
                    "price", doc.get("price")
            ));
        }
        return result;
    }

    /**
     * 关闭索引（应用关闭时调用）
     */
    public void close() throws IOException {
        index.close();
    }
}

4. 编写测试接口

创建 Controller 测试检索功能：

import org.apache.lucene.queryparser.classic.ParseException;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;
import java.util.List;
import java.util.Map;

@RestController
public class SearchController {
    private final WhooshSearchService whooshSearchService;

    public SearchController(WhooshSearchService whooshSearchService) {
        this.whooshSearchService = whooshSearchService;
    }

    /**
     * 检索接口
     * @param keyword 关键词（如：手机、苹果）
     * @return 检索结果
     */
    @GetMapping("/search")
    public List<Map<String, String>> search(@RequestParam String keyword) {
        try {
            // 检索name字段包含关键词的文档
            return whooshSearchService.search(keyword, "name");
        } catch (IOException | ParseException e) {
            throw new RuntimeException("检索失败：" + e.getMessage());
        }
    }
}

三、测试验证

启动 Spring Boot 应用；
访问接口：http://localhost:8080/search?keyword=手机；

预期返回结果：

[
    {
        "id": "1",
        "name": "华为Mate60 Pro手机",
        "category": "数码产品",
        "price": "6999.0"
    }
]

访问http://localhost:8080/search?keyword=数码，会返回所有 3 条测试数据。

四、关键说明

1. 索引类型选择

内存索引：Whoosh.createInMemory(analyzer)，无需磁盘，速度快，但应用重启后索引丢失，适合测试 / 临时数据；
文件索引：Whoosh.create(indexPath, analyzer)，索引持久化到磁盘，适合生产环境，需保证目录读写权限。

2. IK 分词器模式

智能分词（useSmart=true）：适合检索场景，分词粒度粗（如 “华为手机” 拆分为 “华为”“手机”）；
细粒度分词（useSmart=false）：适合精准匹配，分词粒度细（如 “华为手机” 拆分为 “华为”“手”“手机”）。

3. 字段类型

addTextField：分词存储，支持全文检索（如商品名称、描述）；
addStringField：精确存储，不分词（如 ID、分类、价格）。

五、进阶扩展

索引更新 / 删除：

// 删除文档（根据id）
index.deleteDocuments(Query.parse("id:1", analyzer));
// 更新文档（先删后加）
index.deleteDocuments(Query.parse("id:1", analyzer));
addDocument(Map.of("id", "1", "name", "华为Mate60 Pro+", "category", "数码产品", "price", "7999.0"));
index.commit();

2.多字段检索：

// 检索name或category包含关键词
Query query = Query.parse("name:" + keyword + " OR category:" + keyword, analyzer);

总结

集成核心步骤：引入 Whoosh-java + Lucene + IKAnalyzer 依赖 → 适配 IK 分词器到 Lucene 9.x → 封装 Whoosh 检索服务（初始化索引 / 分词器、新增文档、检索） → 编写测试接口；
核心优势：无需部署第三方服务、基于内存 / 文件索引、中文分词友好、开发成本比原生 Lucene 低；
适用场景：小 / 中数据量（10 万条以内）、无需分布式检索、对部署复杂度敏感的小型 Spring Boot 项目。

注意：本文归作者所有，未经作者允许，不得转载

一、核心前提说明

二、完整集成步骤（Spring Boot 3.x）

1. 引入依赖（pom.xml）

2. 自定义 IK 分词器适配 Lucene 9.x

3. 封装 Whoosh-java 检索服务

4. 编写测试接口

三、测试验证

四、关键说明

1. 索引类型选择

2. IK 分词器模式

3. 字段类型

五、进阶扩展

总结

已有 0 条评论

热门阅读

MySql5.7安装教程

联通宽带无法访问github,怎么办？

CiteSpace入门（安装教程）

C# NUnit单元测试

JSP九大内置对象

【springboot shiro】（2）Spring Boot与Shiro框架简介

ORCL 导入导出（个人笔记版）

【springboot shiro】（6）thymeleaf和shiro标签整合使用

最新发布