nineloong's blog

题目描述

LeetCode 3093 — 最长公共后缀查询 (Hard)

给定两个字符串数组 wordsContainer 和 wordsQuery。对每个 wordsQuery[i]，从 wordsContainer 中找出一个与它有最长公共后缀的字符串。如果多个字符串的公共后缀长度相同，取其中长度最短的那个；若仍然相同，取下标最小的那个。

返回答案数组 ans，其中 ans[i] 是匹配到的字符串在 wordsContainer 中的下标。

示例 1：

输入：wordsContainer = ["abcd","bcd","xbcd"], wordsQuery = ["cd","bcd","xyz"]
输出：[1,1,1]
解释：三个字符串都与 "cd" 共享后缀 "cd"，其中 "bcd" 最短，下标为 1。

示例 2：

输入：wordsContainer = ["abcdefgh","poiuygh","ghghgh"], wordsQuery = ["gh","acbfgh","acbfegh"]
输出：[2,0,2]
解释：三个字符串都共享后缀 "gh"，其中 "ghghgh" 最短，下标为 2。

约束：

1 <= wordsContainer.length, wordsQuery.length <= 10^4
1 <= wordsContainer[i].length, wordsQuery[i].length <= 5 * 10^3
所有字符串仅包含小写英文字母
wordsContainer 中所有字符串长度之和不超过 5 * 10^5
wordsQuery 中所有字符串长度之和不超过 5 * 10^5

求解思路

最直接的想法是对每个查询字符串，遍历 wordsContainer 逐个比较后缀。但 wordsContainer 和 wordsQuery 各有最多 10^4 个元素，每个字符串最长 5 * 10^3，暴力比较的代价太高。

回顾一下上一道字典树题目——3043 找的是公共前缀，直接把字符串正序插入 Trie 就行。这题变成了公共后缀，本质区别只在匹配方向：后缀匹配就是从字符串末尾往前比。如果把每个字符串反转，后缀问题就变成了前缀问题，字典树可以直接复用。

不过这题比 3043 多了一个要求：匹配不上最长后缀的时候，要返回 wordsContainer 中最短的字符串（下标最小优先）。这意味着每个查询可能根本走不到树的深处——一个很短的查询字符串，和一个很长的容器字符串，它们的公共后缀可能只有两三个字符，但那个长字符串仍然是最优答案，因为它在更浅的节点上就被记录了。

所以，不能只在 Trie 的叶子节点或特定深度上记答案，而是每个节点都要维护一个当前最优解：经过这个节点的所有字符串中，哪个最短（同等长度取最先插入的）。这样查询时，从根往深处走，只要下一个字符的子节点存在就继续走；一旦走不下去了，当前节点记录的就是答案——它代表的就是最长公共后缀所能到达的最深位置。

插入时从字符串末尾往前遍历，正好把后缀逐字符地挂到 Trie 上；查询时也从末尾往前走，走多深就是多长的公共后缀。每到一个节点，如果当前插入的字符串比之前记录的更短，就更新这个节点的最优解。

解法：字典树

#include <vector>
#include <string>
using namespace std;

struct TrieNode {
    TrieNode* children[26]; // 指向 26 个字母的子节点
    int minLen;             // 经过该节点的最短字符串长度
    int bestIdx;            // 对应的字符串下标
    TrieNode() : minLen(-1), bestIdx(-1) {
        for (int i = 0; i < 26; i++) children[i] = nullptr;
    }
};

class Trie {
private:
    TrieNode* root;
public:
    Trie() { root = new TrieNode(); }

    void insert(const string& word, int index) {
        TrieNode* node = root;
        int len = word.size();
        // 根节点也要记录，处理空后缀匹配的情况
        if (node->minLen == -1 || len < node->minLen) {
            node->minLen = len;
            node->bestIdx = index;
        }
        // 从末尾往前插入，把后缀转化为前缀
        for (int i = len - 1; i >= 0; i--) {
            int idx = word[i] - 'a';
            if (node->children[idx] == nullptr) {
                node->children[idx] = new TrieNode();
            }
            node = node->children[idx];
            if (node->minLen == -1 || len < node->minLen) {
                node->bestIdx = index;
                node->minLen = len;
            }
        }
    }

    int search(const string& word) {
        TrieNode* node = root;
        // 从末尾往前走，能走多深就是多长的公共后缀
        for (int i = word.size() - 1; i >= 0; i--) {
            int idx = word[i] - 'a';
            if (node->children[idx] == nullptr) break;
            node = node->children[idx];
        }
        return node->bestIdx;
    }
};

class Solution {
public:
    vector<int> stringIndices(vector<string>& wordsContainer, vector<string>& wordsQuery) {
        Trie trie;
        for (int i = 0; i < wordsContainer.size(); i++) {
            trie.insert(wordsContainer[i], i);
        }
        vector<int> ans;
        for (string& s : wordsQuery) {
            ans.push_back(trie.search(s));
        }
        return ans;
    }
};

这个版本逻辑正确，但提交后会内存超限。问题出在节点的内存开销上：64 位系统上一个指针占 8 字节，每个节点的 children[26] 数组就是 208 字节，加上 minLen 和 bestIdx，一个节点约 224 字节。字符串总长度可达 5 * 10^5，Trie 节点数接近这个量级，几十万个节点轻松突破内存限制。

优化的方法是把指针换成整数下标，用 vector<TrieNode> 连续存储所有节点。具体替换两处：

TrieNode* children[26] → int children[26]，用数组下标代替指针，0 表示子节点不存在，每个 children 数组从 208 字节降到 104 字节
TrieNode* root + new TrieNode() → vector<TrieNode> nodes，所有节点分配在连续数组中，下标 0 就是根节点，新节点通过 emplace_back 追加到数组末尾

节点总大小直接减半，连续内存布局还有缓存友好的附带好处。算法逻辑完全不变，只是存储方式从散落的堆分配变成了紧凑的数组：

#include <vector>
#include <string>
using namespace std;

struct TrieNode {
    int children[26]; // 子节点在数组中的下标，0 表示不存在
    int minLen;       // 经过该节点的最短字符串长度
    int bestIdx;      // 对应的字符串下标
    TrieNode() : minLen(-1), bestIdx(-1) {
        for (int i = 0; i < 26; i++) children[i] = 0;
    }
};

class Trie {
private:
    vector<TrieNode> nodes; // 连续数组存储所有节点，下标 0 为根
public:
    Trie() { nodes.emplace_back(); }

    void insert(const string& word, int index) {
        int curr = 0;
        int len = word.size();
        // 根节点也要记录，处理空后缀匹配的情况
        if (nodes[curr].minLen == -1 || len < nodes[curr].minLen) {
            nodes[curr].minLen = len;
            nodes[curr].bestIdx = index;
        }
        // 从末尾往前插入，把后缀转化为前缀
        for (int i = len - 1; i >= 0; i--) {
            int idx = word[i] - 'a';
            if (nodes[curr].children[idx] == 0) {
                nodes.emplace_back();
                nodes[curr].children[idx] = nodes.size() - 1;
            }
            curr = nodes[curr].children[idx];
            if (nodes[curr].minLen == -1 || len < nodes[curr].minLen) {
                nodes[curr].bestIdx = index;
                nodes[curr].minLen = len;
            }
        }
    }

    int search(const string& word) {
        int curr = 0;
        // 同样从末尾往前走，能走多深就是多长的公共后缀
        for (int i = word.size() - 1; i >= 0; i--) {
            int idx = word[i] - 'a';
            if (nodes[curr].children[idx] == 0) break;
            curr = nodes[curr].children[idx];
        }
        return nodes[curr].bestIdx;
    }
};

class Solution {
public:
    vector<int> stringIndices(vector<string>& wordsContainer, vector<string>& wordsQuery) {
        Trie trie;
        for (int i = 0; i < wordsContainer.size(); i++) {
            trie.insert(wordsContainer[i], i);
        }
        vector<int> ans;
        for (string& s : wordsQuery) {
            ans.push_back(trie.search(s));
        }
        return ans;
    }
};

复杂度： 时间 $O(S_1 + S_2)$ ， $S_1$ 和 $S_2$ 分别是两个数组中所有字符串的总长度；空间 $O(S_1)$ ，用于存储 Trie。

每日一题：最长公共后缀查询

题目描述

求解思路

解法：字典树

相关题目