fork download
  1. // Compact Suffix-Array-oriented template with 30 small helper functions.
  2. // Each helper is standalone (outside the class) so you can remove what you don't need.
  3. // All helper functions include a short "command" comment: what it does, inputs, outputs, complexity.
  4. // Build: C++17
  5.  
  6. #include <bits/stdc++.h>
  7. using namespace std;
  8. using ll = long long;
  9.  
  10. struct SuffixArray {
  11. // Build SA + LCP + RMQ over LCP.
  12. // Input: original string (no sentinel). Constructor adds sentinel internally.
  13. string s; // with sentinel at the end
  14. int n; // length including sentinel
  15. vector<int> sa; // sa[i] = starting index of i-th suffix in sorted order
  16. vector<int> rank_; // rank_[pos] = index in sa
  17. vector<int> lcp; // lcp[i] = LCP(sa[i], sa[i-1]) (lcp[0]=0)
  18. int LOG;
  19. vector<vector<int>> st; // sparse table over lcp
  20.  
  21. explicit SuffixArray(const string &orig = "") {
  22. if (orig.size()) build(orig);
  23. }
  24.  
  25. void build(const string &orig) {
  26. s = orig;
  27. s.push_back('$'); // sentinel
  28. n = (int)s.size();
  29. sa.assign(n, 0);
  30. rank_.assign(n, 0);
  31. lcp.assign(n, 0);
  32.  
  33. // SA build (O(n log n))
  34. vector<pair<char,int>> a(n);
  35. for (int i = 0; i < n; ++i) a[i] = {s[i], i};
  36. sort(a.begin(), a.end());
  37. for (int i = 0; i < n; ++i) sa[i] = a[i].second;
  38. vector<int> c(n);
  39. c[sa[0]] = 0;
  40. for (int i = 1; i < n; ++i)
  41. c[sa[i]] = c[sa[i-1]] + (a[i].first != a[i-1].first);
  42.  
  43. int k = 0;
  44. vector<int> pn(n), cn(n);
  45. while ((1<<k) < n) {
  46. for (int i = 0; i < n; ++i)
  47. pn[i] = (sa[i] - (1<<k) + n) % n;
  48. // counting sort by class c
  49. vector<int> cnt(n);
  50. for (int i = 0; i < n; ++i) cnt[c[i]]++;
  51. vector<int> pos(n);
  52. for (int i = 1; i < n; ++i) pos[i] = pos[i-1] + cnt[i-1];
  53. for (int x : pn) sa[pos[c[x]]++] = x;
  54. cn[sa[0]] = 0;
  55. for (int i = 1; i < n; ++i) {
  56. pair<int,int> prev = {c[sa[i-1]], c[(sa[i-1] + (1<<k)) % n]};
  57. pair<int,int> now = {c[sa[i]], c[(sa[i] + (1<<k)) % n]};
  58. cn[sa[i]] = cn[sa[i-1]] + (now != prev);
  59. }
  60. c = cn;
  61. ++k;
  62. if (c[sa.back()] == n-1) break;
  63. }
  64. for (int i = 0; i < n; ++i) rank_[sa[i]] = i;
  65.  
  66. // Kasai LCP (O(n))
  67. int h = 0;
  68. for (int i = 0; i < n; ++i) {
  69. int r = rank_[i];
  70. if (r == 0) { lcp[r] = 0; continue; }
  71. int j = sa[r-1];
  72. while (i+h < n && j+h < n && s[i+h] == s[j+h]) ++h;
  73. lcp[r] = h;
  74. if (h) --h;
  75. }
  76.  
  77. // build RMQ on lcp
  78. LOG = 1;
  79. while ((1<<LOG) <= n) ++LOG;
  80. st.assign(n, vector<int>(LOG, INT_MAX));
  81. for (int i = 0; i < n; ++i) st[i][0] = lcp[i];
  82. for (int j = 1; j < LOG; ++j)
  83. for (int i = 0; i + (1<<j) <= n; ++i)
  84. st[i][j] = min(st[i][j-1], st[i + (1<<(j-1))][j-1]);
  85. }
  86.  
  87. // RMQ on lcp by SA indices range [L+1..R] minimum (L < R)
  88. int lcp_rmq_by_sa_index(int L, int R) const {
  89. if (L > R) return 0;
  90. if (L == R) return lcp[L];
  91. int l = L+1, r = R;
  92. if (l > r) return 0;
  93. int len = r - l + 1;
  94. int k = 31 - __builtin_clz(len);
  95. return min(st[l][k], st[r - (1<<k) + 1][k]);
  96. }
  97.  
  98. // LCP between suffixes starting at positions i and j (0-based in original string)
  99. int lcp_between_positions(int i, int j) const {
  100. if (i == j) return (n - 1) - i;
  101. int ri = rank_[i], rj = rank_[j];
  102. if (ri > rj) swap(ri, rj);
  103. return lcp_rmq_by_sa_index(ri, rj);
  104. }
  105.  
  106. // helpers to expose
  107. const vector<int>& get_sa() const { return sa; }
  108. const vector<int>& get_lcp() const { return lcp; }
  109. int rank_of(int pos) const { return rank_[pos]; }
  110. const string& str_with_sentinel() const { return s; }
  111. };
  112.  
  113. // ---------------------- Helpers / "commands" (30) ----------------------
  114.  
  115. // 1) Pattern matching (exact search)
  116. // Command: sa_contains(sa, pattern)
  117. // Input: SuffixArray sa, pattern string
  118. // Output: bool true if pattern exists
  119. // Complexity: O(|P| log n)
  120. bool sa_contains(const SuffixArray &sa, const string &pattern) {
  121. const string &S = sa.str_with_sentinel();
  122. int n = (int)sa.get_sa().size();
  123. int L = 0, R = n-1;
  124. while (L <= R) {
  125. int mid = (L+R)>>1;
  126. int pos = sa.get_sa()[mid];
  127. int cmp = S.compare(pos, pattern.size(), pattern);
  128. if (cmp == 0) return true;
  129. if (cmp < 0) L = mid + 1;
  130. else R = mid - 1;
  131. }
  132. return false;
  133. }
  134.  
  135. // 2) LCP between two suffix starts (by position)
  136. // Command: sa_lcp_between(sa, i, j)
  137. // Input: SA, positions i,j (0-based original)
  138. // Output: int LCP length
  139. // Complexity: O(1) with RMQ
  140. int sa_lcp_between(const SuffixArray &sa, int i, int j) {
  141. return sa.lcp_between_positions(i, j);
  142. }
  143.  
  144. // 3) Longest repeated substring
  145. // Command: sa_longest_repeated(sa)
  146. // Input: SA
  147. // Output: longest repeated substring (may be empty)
  148. // Complexity: O(n)
  149. string sa_longest_repeated(const SuffixArray &sa) {
  150. const auto &lcp = sa.get_lcp();
  151. const auto &saarr = sa.get_sa();
  152. int idx = max_element(lcp.begin(), lcp.end()) - lcp.begin();
  153. if (lcp[idx] == 0) return string();
  154. const string &S = sa.str_with_sentinel();
  155. return S.substr(saarr[idx], lcp[idx]);
  156. }
  157.  
  158. // 4) Longest substring that appears >= k times
  159. // Command: sa_longest_at_least_k(sa, k)
  160. // Input: SA, integer k >= 2
  161. // Output: substring (empty if none)
  162. // Complexity: O(n) with RMQ
  163. string sa_longest_at_least_k(const SuffixArray &sa, int k) {
  164. if (k <= 1) return ""; // every substring appears >=1 times
  165. int nsa = (int)sa.get_sa().size();
  166. int best = 0, best_pos = -1;
  167. for (int i = 0; i + k - 1 < nsa; ++i) {
  168. int cur = sa.lcp_rmq_by_sa_index(i, i + k - 1);
  169. if (cur > best) { best = cur; best_pos = sa.get_sa()[i+1]; }
  170. }
  171. if (best == 0) return string();
  172. const string &S = sa.str_with_sentinel();
  173. return S.substr(best_pos, best);
  174. }
  175.  
  176. // 5) Longest palindromic substring (Manacher)
  177. // Command: longest_palindrome(s)
  178. // Input: string s
  179. // Output: longest palindromic substring
  180. // Complexity: O(n)
  181. string longest_palindrome(const string &s) {
  182. int n = s.size();
  183. if (n == 0) return "";
  184. // odd-length centers
  185. vector<int> d1(n);
  186. int l=0, r=-1;
  187. for (int i=0;i<n;i++){
  188. int k = (i>r)?1:min(d1[l + r - i], r - i + 1);
  189. while (0<=i-k && i+k<n && s[i-k]==s[i+k]) ++k;
  190. d1[i]=k--;
  191. if (i + k > r) { l = i - k; r = i + k; }
  192. }
  193. vector<int> d2(n); l=0; r=-1;
  194. for (int i=0;i<n;i++){
  195. int k = (i>r)?0:min(d2[l + r - i + 1], r - i + 1);
  196. while (0<=i-k-1 && i+k<n && s[i-k-1]==s[i+k]) ++k;
  197. d2[i]=k--;
  198. if (i + k > r) { l = i - k - 1; r = i + k; }
  199. }
  200. int best_len = 0, pos = 0;
  201. for (int i=0;i<n;i++){
  202. int len = d1[i]*2-1;
  203. if (len > best_len) { best_len = len; pos = i - d1[i] + 1; }
  204. len = d2[i]*2;
  205. if (len > best_len) { best_len = len; pos = i - d2[i]; }
  206. }
  207. return s.substr(pos, best_len);
  208. }
  209.  
  210. // 6) Longest common substring of two strings (via SA static helper)
  211. // Command: sa_lcs_two(a,b)
  212. // Input: two strings
  213. // Output: LCS string
  214. // Complexity: O(n log n) from SA build
  215. string sa_lcs_two(const string &a, const string &b) {
  216. // use unique small separators (chars 1 and 2)
  217. string sep1(1, char(1));
  218. string sep2(1, char(2));
  219. string comb = a + sep1 + b + sep2;
  220. SuffixArray sa(comb);
  221. int na = (int)a.size();
  222. int best = 0, pos = 0;
  223. const auto &saarr = sa.get_sa();
  224. const auto &lcp = sa.get_lcp();
  225. for (int i = 1; i < (int)saarr.size(); ++i) {
  226. int x = saarr[i], y = saarr[i-1];
  227. bool inA_x = x < na;
  228. bool inA_y = y < na;
  229. if (inA_x != inA_y) {
  230. if (lcp[i] > best) {
  231. best = lcp[i];
  232. pos = saarr[i];
  233. }
  234. }
  235. }
  236. if (best == 0) return string();
  237. return comb.substr(pos, best);
  238. }
  239.  
  240. // 7) Longest common substring of K strings (concatenate + sliding window)
  241. // Command: sa_lcs_k(vector<string> strs)
  242. // Input: vector<string> (K)
  243. // Output: longest substring common to all K
  244. // Complexity: O(total_length * log total_length)
  245. string sa_lcs_k(const vector<string> &strs) {
  246. int k = (int)strs.size();
  247. if (k == 0) return "";
  248. // assign separators chars 1..K (must not appear in inputs)
  249. string comb;
  250. vector<int> owner; // owner index for each position in comb
  251. for (int i = 0; i < k; ++i) {
  252. comb += strs[i];
  253. comb.push_back(char(1 + i)); // unique separator
  254. }
  255. SuffixArray sa(comb);
  256. int nsa = (int)sa.get_sa().size();
  257. vector<int> pos_owner(nsa);
  258. // map sa entry to owner group
  259. int idx = 0;
  260. vector<int> start_pos_owner((int)comb.size());
  261. // build owner by scanning original concatenation: for each position, determine which string it belonged to
  262. owner.assign((int)comb.size(), -1);
  263. int cur = 0;
  264. for (int i = 0; i < k; ++i) {
  265. for (char ch : strs[i]) {
  266. owner[cur++] = i;
  267. }
  268. owner[cur++] = -1; // separator
  269. }
  270. for (int i = 0; i < nsa; ++i) {
  271. int p = sa.get_sa()[i];
  272. pos_owner[i] = (p < (int)owner.size()) ? owner[p] : -1;
  273. }
  274. // sliding window on SA array to cover all k groups, keep min LCP in window
  275. vector<int> cnt(k, 0);
  276. int have = 0;
  277. int l = 0;
  278. int best = 0, bestpos = -1;
  279. for (int r = 0; r < nsa; ++r) {
  280. int o = pos_owner[r];
  281. if (o >= 0) { if (cnt[o]++ == 0) ++have; }
  282. while (have == k && l <= r) {
  283. // compute min LCP in [l..r] (meaning min of lcp[l+1..r])
  284. if (l < r) {
  285. int curmin = sa.lcp_rmq_by_sa_index(l, r);
  286. if (curmin > best) { best = curmin; bestpos = sa.get_sa()[l+1]; }
  287. }
  288. int ol = pos_owner[l];
  289. if (ol >= 0) { if (--cnt[ol] == 0) --have; }
  290. ++l;
  291. }
  292. }
  293. if (best == 0) return string();
  294. return comb.substr(bestpos, best);
  295. }
  296.  
  297. // 8) Count distinct substrings
  298. // Command: sa_count_distinct(sa)
  299. // Input: SA
  300. // Output: number of distinct substrings (ll)
  301. // Complexity: O(n)
  302. ll sa_count_distinct(const SuffixArray &sa) {
  303. ll ans = 0;
  304. const auto &saarr = sa.get_sa();
  305. const auto &lcp = sa.get_lcp();
  306. const string &S = sa.str_with_sentinel();
  307. int n = (int)saarr.size();
  308. for (int i = 1; i < n; ++i) {
  309. ll suf_len = (n - 1) - saarr[i];
  310. ans += max(0LL, suf_len - (ll)lcp[i]);
  311. }
  312. return ans;
  313. }
  314.  
  315. // 9) k-th lexicographic substring
  316. // Command: sa_kth_substring(sa, k) (1-based k)
  317. // Input: SA, k
  318. // Output: substring or empty if none
  319. // Complexity: O(n)
  320. string sa_kth_substring(const SuffixArray &sa, ll k) {
  321. const auto &saarr = sa.get_sa();
  322. const auto &lcp = sa.get_lcp();
  323. const string &S = sa.str_with_sentinel();
  324. int n = (int)saarr.size();
  325. for (int i = 1; i < n; ++i) {
  326. ll suf_len = (n - 1) - saarr[i];
  327. ll newsubs = max(0LL, suf_len - (ll)lcp[i]);
  328. if (k > newsubs) k -= newsubs;
  329. else {
  330. ll take = lcp[i] + k;
  331. return S.substr(saarr[i], (size_t)take);
  332. }
  333. }
  334. return string();
  335. }
  336.  
  337. // 10) Occurrences count of all distinct substrings (small strings only)
  338. // Command: occurrences_all_small(s)
  339. // Input: original string s (recommend n<=2000)
  340. // Output: vector of (substring, count)
  341. // Complexity: O(n^2 log n) memory/time heavy for big n
  342. vector<pair<string,int>> occurrences_all_small(const string &s) {
  343. int n = (int)s.size();
  344. if (n > 2000) return {}; // avoid explosion; caller may prune
  345. unordered_map<string,int> cnt;
  346. for (int i = 0; i < n; ++i)
  347. for (int len = 1; len <= n - i; ++len)
  348. ++cnt[s.substr(i,len)];
  349. vector<pair<string,int>> out;
  350. out.reserve(cnt.size());
  351. for (auto &kv : cnt) out.emplace_back(kv.first, kv.second);
  352. sort(out.begin(), out.end());
  353. return out;
  354. }
  355.  
  356. // 11) Compare two substrings lexicographically in O(1)
  357. // Command: compare_substrings(sa, l1, r1, l2, r2)
  358. // Input: SA, inclusive indices l1..r1 and l2..r2 (0-based original string)
  359. // Output: -1 if S[l1..r1] < S[l2..r2], 0 if equal, 1 if greater
  360. // Complexity: O(1)
  361. int compare_substrings(const SuffixArray &sa, int l1, int r1, int l2, int r2) {
  362. int len1 = r1 - l1 + 1, len2 = r2 - l2 + 1;
  363. int L = sa.lcp_between_positions(l1, l2);
  364. if (L >= min(len1, len2)) {
  365. if (len1 == len2) return 0;
  366. return (len1 < len2) ? -1 : 1;
  367. }
  368. char a = sa.str_with_sentinel()[l1 + L];
  369. char b = sa.str_with_sentinel()[l2 + L];
  370. return (a < b) ? -1 : 1;
  371. }
  372.  
  373. // 12) Longest Previous Factor (LPF) naive for small n
  374. // Command: compute_lpf_naive(s)
  375. // Input: string s
  376. // Output: vector<int> LPF where LPF[i] = length of longest substring starting at i that appears earlier
  377. // Complexity: O(n^2) naive
  378. vector<int> compute_lpf_naive(const string &s) {
  379. int n = (int)s.size();
  380. vector<int> lpf(n, 0);
  381. for (int i = 0; i < n; ++i) {
  382. for (int j = 0; j < i; ++j) {
  383. int k = 0;
  384. while (i + k < n && s[j + k] == s[i + k]) ++k;
  385. lpf[i] = max(lpf[i], k);
  386. }
  387. }
  388. return lpf;
  389. }
  390.  
  391. // 13) LZ77 naive factorization (triples (pos,len,nextchar))
  392. // Command: lz77_naive(s)
  393. // Input: s
  394. // Output: vector of tuples (pos,len,nextchar) where pos=previous-match position or 0
  395. // Complexity: O(n^2)
  396. struct LZ77Token { int pos, len; char next; };
  397. vector<LZ77Token> lz77_naive(const string &s) {
  398. int n = (int)s.size();
  399. vector<LZ77Token> out;
  400. for (int i = 0; i < n; ) {
  401. int best_pos = 0, best_len = 0;
  402. for (int j = 0; j < i; ++j) {
  403. int k = 0;
  404. while (i + k < n && s[j + k] == s[i + k]) ++k;
  405. if (k > best_len) { best_len = k; best_pos = j; }
  406. }
  407. char nextch = (i + best_len < n) ? s[i + best_len] : 0;
  408. out.push_back({best_pos, best_len, nextch});
  409. i += max(1, best_len + (nextch ? 1 : 0));
  410. }
  411. return out;
  412. }
  413.  
  414. // 14) Burrows-Wheeler Transform (BWT)
  415. // Command: build_bwt(sa)
  416. // Input: SA built on string with sentinel
  417. // Output: BWT string (length n)
  418. string build_bwt(const SuffixArray &sa) {
  419. const auto &saarr = sa.get_sa();
  420. const string &S = sa.str_with_sentinel();
  421. int n = (int)saarr.size();
  422. string bwt; bwt.resize(n);
  423. for (int i = 0; i < n; ++i) {
  424. int p = saarr[i];
  425. int idx = (p - 1 + n) % n;
  426. bwt[i] = S[idx];
  427. }
  428. return bwt;
  429. }
  430.  
  431. // 15) Lightweight FM-index build + backward_search (counts)
  432. // Command: build_fm_index(sa) then fm_count(pattern)
  433. // Input: SA
  434. // Output: FMIndex object and count method
  435. // Complexity: build O(n * alphabet), query O(|P|)
  436. struct FMIndex {
  437. string bwt;
  438. map<char,int> C; // total chars < c in lexicographical order
  439. unordered_map<char, vector<int>> occ; // occ[c][i] = occurrences of c in bwt[0..i-1]
  440. vector<char> alphabet;
  441. FMIndex() = default;
  442. explicit FMIndex(const SuffixArray &sa) {
  443. bwt = build_bwt(sa);
  444. int n = (int)bwt.size();
  445. map<char,int> freq;
  446. for (char ch : bwt) ++freq[ch];
  447. int sum = 0;
  448. for (auto &kv : freq) {
  449. C[kv.first] = sum;
  450. alphabet.push_back(kv.first);
  451. sum += kv.second;
  452. }
  453. for (char ch : alphabet) {
  454. occ[ch].assign(n+1, 0);
  455. }
  456. for (int i = 0; i < n; ++i) {
  457. for (char ch : alphabet) occ[ch][i+1] = occ[ch][i];
  458. occ[bwt[i]][i+1]++;
  459. }
  460. }
  461. // backward search returns [L,R] in SA (inclusive) or empty pair if none
  462. pair<int,int> backward_search(const string &pat) const {
  463. int n = (int)bwt.size();
  464. int L = 0, R = n - 1;
  465. for (int i = (int)pat.size()-1; i >= 0; --i) {
  466. char c = pat[i];
  467. if (C.find(c) == C.end()) return {-1,-1};
  468. int cStart = C.at(c);
  469. int occL = occ.at(c)[L];
  470. int occR = occ.at(c)[R+1];
  471. if (occR - occL == 0) return {-1,-1};
  472. L = cStart + occL;
  473. R = cStart + occR - 1;
  474. }
  475. return {L,R};
  476. }
  477. ll count(const string &pat) const {
  478. auto pr = backward_search(pat);
  479. if (pr.first == -1) return 0;
  480. return (ll)pr.second - pr.first + 1;
  481. }
  482. };
  483.  
  484. // 16) Minimal rotation (Booth)
  485. // Command: minimal_rotation(s)
  486. // Input: string s
  487. // Output: index of lexicographically minimal rotation (0-based), and rotation string
  488. // Complexity: O(n)
  489. int minimal_rotation_index(const string &s) {
  490. string ss = s + s;
  491. int n = s.size();
  492. int i = 0, j = 1, k = 0;
  493. while (i < n && j < n && k < n) {
  494. if (ss[i+k] == ss[j+k]) ++k;
  495. else if (ss[i+k] > ss[j+k]) { i = i + k + 1; if (i <= j) i = j+1; k = 0; }
  496. else { j = j + k + 1; if (j <= i) j = i+1; k = 0; }
  497. }
  498. return min(i, j);
  499. }
  500. string minimal_rotation(const string &s) {
  501. int idx = minimal_rotation_index(s);
  502. string r = s.substr(idx) + s.substr(0, idx);
  503. return r;
  504. }
  505.  
  506. // 17) FM-index multi-search wrapper
  507. // Command: fm_multi_search(fm, pattern)
  508. // Input: FMIndex, pattern
  509. // Output: count of occurrences
  510. ll fm_multi_search(const FMIndex &fm, const string &pat) {
  511. return fm.count(pat);
  512. }
  513.  
  514. // 18) Nearest lexicographic neighbors for a pattern
  515. // Command: nearest_lex_neighbors(sa, pattern)
  516. // Input: SA, pattern
  517. // Output: pair(prev_suffix_pos, next_suffix_pos) (-1 if none)
  518. // Complexity: O(log n)
  519. pair<int,int> nearest_lex_neighbors(const SuffixArray &sa, const string &pattern) {
  520. const string &S = sa.str_with_sentinel();
  521. int n = (int)sa.get_sa().size();
  522. int L = 0, R = n-1, ansLo = n, ansHi = -1;
  523. while (L <= R) {
  524. int mid = (L+R)>>1;
  525. int pos = sa.get_sa()[mid];
  526. int cmp = S.compare(pos, pattern.size(), pattern);
  527. if (cmp < 0) L = mid + 1;
  528. else { ansLo = mid; R = mid - 1; }
  529. }
  530. L = 0; R = n-1;
  531. while (L <= R) {
  532. int mid = (L+R)>>1;
  533. int pos = sa.get_sa()[mid];
  534. int cmp = S.compare(pos, pattern.size(), pattern);
  535. if (cmp <= 0) { ansHi = mid; L = mid + 1; }
  536. else R = mid - 1;
  537. }
  538. int prev_pos = (ansLo <= 0) ? -1 : sa.get_sa()[ansLo-1];
  539. int next_pos = (ansHi+1 >= n) ? -1 : sa.get_sa()[ansHi+1];
  540. return {prev_pos, next_pos};
  541. }
  542.  
  543. // 19) Build Suffix Automaton (simple implementation)
  544. // Command: build_sam(s)
  545. // Input: string s
  546. // Output: SAM structure (vector of transitions/states)
  547. // Complexity: O(n * alphabet)
  548. struct SAMState {
  549. int link = -1;
  550. int len = 0;
  551. unordered_map<char,int> next;
  552. };
  553. vector<SAMState> build_sam(const string &s) {
  554. vector<SAMState> st;
  555. st.push_back(SAMState()); // root
  556. int last = 0;
  557. for (char c : s) {
  558. int cur = st.size(); st.push_back(SAMState());
  559. st[cur].len = st[last].len + 1;
  560. int p = last;
  561. for (; p != -1 && !st[p].next.count(c); p = st[p].link) st[p].next[c] = cur;
  562. if (p == -1) st[cur].link = 0;
  563. else {
  564. int q = st[p].next[c];
  565. if (st[p].len + 1 == st[q].len) st[cur].link = q;
  566. else {
  567. int clone = st.size(); st.push_back(st[q]);
  568. st[clone].len = st[p].len + 1;
  569. for (; p != -1 && st[p].next[c] == q; p = st[p].link) st[p].next[c] = clone;
  570. st[q].link = st[cur].link = clone;
  571. }
  572. }
  573. last = cur;
  574. }
  575. return st;
  576. }
  577.  
  578. // 20) Flatten grid rows into a single string with separators (helper for 2D string SA)
  579. // Command: concat_rows(rows, sep_char)
  580. // Input: vector<string> rows, separator char
  581. // Output: concatenated string
  582. string concat_rows(const vector<string> &rows, char sep = '\1') {
  583. string out;
  584. for (size_t i = 0; i < rows.size(); ++i) {
  585. out += rows[i];
  586. if (i + 1 < rows.size()) out.push_back(sep);
  587. }
  588. return out;
  589. }
  590.  
  591. // 21) Find periodic runs using Z-algorithm (simple repeats finder)
  592. // Command: find_runs(s)
  593. // Input: string s
  594. // Output: vector of (start, length, period) for runs found (heuristic)
  595. // Complexity: O(n^2) worst-case (naive check), typically OK for medium n
  596. vector<tuple<int,int,int>> find_runs(const string &s) {
  597. int n = (int)s.size();
  598. vector<tuple<int,int,int>> out;
  599. // naive: for each start and period p, check repeats
  600. for (int p = 1; p <= n/2; ++p) {
  601. for (int i = 0; i + p < n; ++i) {
  602. int len = 0;
  603. while (i + len + p < n && s[i+len] == s[i+len+p]) ++len;
  604. if (len >= p) out.emplace_back(i, len + p, p);
  605. }
  606. }
  607. return out;
  608. }
  609.  
  610. // 22) Circular string matching
  611. // Command: find_in_circular(text, pattern)
  612. // Input: text, pattern
  613. // Output: vector of starting positions in text (0..n-1)
  614. // Complexity: O(n + m) via KMP on text+text
  615. vector<int> find_in_circular(const string &text, const string &pat) {
  616. int n = (int)text.size(), m = (int)pat.size();
  617. if (m > n) return {};
  618. string T = text + text.substr(0, m-1);
  619. // KMP
  620. vector<int> pi(m);
  621. for (int i = 1; i < m; ++i) {
  622. int j = pi[i-1];
  623. while (j > 0 && pat[i] != pat[j]) j = pi[j-1];
  624. if (pat[i] == pat[j]) ++j;
  625. pi[i] = j;
  626. }
  627. vector<int> ans;
  628. int j = 0;
  629. for (int i = 0; i < (int)T.size(); ++i) {
  630. while (j > 0 && (j >= m || T[i] != pat[j])) j = pi[j-1];
  631. if (T[i] == pat[j]) ++j;
  632. if (j == m) {
  633. int st = i - m + 1;
  634. if (st < n) ans.push_back(st);
  635. j = pi[j-1];
  636. }
  637. }
  638. sort(ans.begin(), ans.end());
  639. ans.erase(unique(ans.begin(), ans.end()), ans.end());
  640. return ans;
  641. }
  642.  
  643. // 23) Multiple pattern query using SA counts
  644. // Command: sa_multi_count(sa, patterns)
  645. // Input: SA, vector patterns
  646. // Output: vector counts
  647. // Complexity: O(sum |P| * log n)
  648. vector<ll> sa_multi_count(const SuffixArray &sa, const vector<string> &patterns) {
  649. vector<ll> ans; ans.reserve(patterns.size());
  650. FMIndex fm(sa);
  651. for (auto &p : patterns) ans.push_back(fm.count(p));
  652. return ans;
  653. }
  654.  
  655. // 24) All borders of a substring [l..r]
  656. // Command: borders_of_substring(s, l, r)
  657. // Input: original string s, inclusive l..r
  658. // Output: vector of border lengths (descending)
  659. // Complexity: O(len)
  660. vector<int> borders_of_substring(const string &s, int l, int r) {
  661. string sub = s.substr(l, r - l + 1);
  662. int m = (int)sub.size();
  663. vector<int> pi(m);
  664. for (int i = 1; i < m; ++i) {
  665. int j = pi[i-1];
  666. while (j > 0 && sub[i] != sub[j]) j = pi[j-1];
  667. if (sub[i] == sub[j]) ++j;
  668. pi[i] = j;
  669. }
  670. vector<int> res;
  671. int cur = pi[m-1];
  672. while (cur > 0) { res.push_back(cur); cur = pi[cur-1]; }
  673. return res;
  674. }
  675.  
  676. // 25) Detect if substring S[l..r] is unique in the string
  677. // Command: is_unique_substring(sa, l, r)
  678. // Input: SA, l,r
  679. // Output: bool
  680. // Complexity: O(log n)
  681. bool is_unique_substring(const SuffixArray &sa, int l, int r) {
  682. const string &S = sa.str_with_sentinel();
  683. string pat = S.substr(l, r - l + 1);
  684. FMIndex fm(sa);
  685. return fm.count(pat) == 1;
  686. }
  687.  
  688. // 26) Nearest lexicographic matches (already provided above as nearest_lex_neighbors)
  689.  
  690. // 27) Substring frequency query by indices [l..r]
  691. // Command: substring_freq_query(sa, l, r)
  692. // Input: SA, l,r (0-based original)
  693. // Output: count (ll)
  694. // Complexity: O(|sub| log n)
  695. ll substring_freq_query(const SuffixArray &sa, int l, int r) {
  696. const string &S = sa.str_with_sentinel();
  697. string pat = S.substr(l, r - l + 1);
  698. FMIndex fm(sa);
  699. return fm.count(pat);
  700. }
  701.  
  702. // 28) Build Euler-tour + concatenation for tree (helper)
  703. // Command: tree_euler_concat(n, edges, labels, sep)
  704. // Input: n, edges list (0-based), labels per node (string per node), sep char
  705. // Output: concatenated string representing Euler tour with separators
  706. // Complexity: O(n)
  707. string tree_euler_concat(int n, const vector<vector<int>>& adj, const vector<string>& labels, char sep = '\1') {
  708. string out;
  709. vector<int> vis(n,0);
  710. function<void(int)> dfs = [&](int u){
  711. vis[u]=1;
  712. out += labels[u];
  713. out.push_back(sep);
  714. for (int v: adj[u]) if(!vis[v]) dfs(v);
  715. out.push_back(sep);
  716. };
  717. for (int i = 0; i < n; ++i) if(!vis[i]) dfs(i);
  718. return out;
  719. }
  720.  
  721. // 29) DNA indexing helpers: reuse FMIndex and BWT (already implemented)
  722.  
  723. // 30) LCP stats (max, average, median)
  724. // Command: lcp_stats(sa)
  725. // Input: SA
  726. // Output: tuple(max, average, median)
  727. // Complexity: O(n log n) for median via sort
  728. tuple<int,double,int> lcp_stats(const SuffixArray &sa) {
  729. vector<int> arr = sa.get_lcp();
  730. if (arr.size() <= 1) return {0,0.0,0};
  731. vector<int> v(arr.begin()+1, arr.end());
  732. int mx = *max_element(v.begin(), v.end());
  733. double avg = 0;
  734. for (int x : v) avg += x;
  735. avg /= v.size();
  736. sort(v.begin(), v.end());
  737. int med = v[v.size()/2];
  738. return {mx, avg, med};
  739. }
  740.  
  741. // ------------------------ small example usage (demo) ------------------------
  742. int main() {
  743. string s = "banana";
  744. SuffixArray sa(s);
  745.  
  746. // 1) contains
  747. cout << "contains 'ana': " << sa_contains(sa, "ana") << "\n";
  748.  
  749. // 3) longest repeated
  750. cout << "LRS: " << sa_longest_repeated(sa) << "\n";
  751.  
  752. // 5) longest palindrome
  753. cout << "Longest palindrome in 'abacdfgdcaba': " << longest_palindrome("abacdfgdcaba") << "\n";
  754.  
  755. // 9) kth substring
  756. cout << "3rd substring: " << sa_kth_substring(sa, 3) << "\n";
  757.  
  758. // 14) BWT
  759. cout << "BWT: " << build_bwt(sa) << "\n";
  760.  
  761. // 15) FM-index count
  762. FMIndex fm(sa);
  763. cout << "count 'ana' via FM: " << fm.count("ana") << "\n";
  764.  
  765. // 16) minimal rotation
  766. cout << "min rotation of 'bba': " << minimal_rotation("bba") << "\n";
  767.  
  768. // 30) lcp stats
  769. auto [mx, avg, med] = lcp_stats(sa);
  770. cout << "LCP max=" << mx << " avg=" << avg << " med=" << med << "\n";
  771. return 0;
  772. }
  773.  
  774.  
Success #stdin #stdout 0.01s 5324KB
stdin
Standard input is empty
stdout
contains 'ana': 1
LRS: ana
Longest palindrome in 'abacdfgdcaba': aba
3rd substring: ana
BWT: annb$aa
count 'ana' via FM: 2
min rotation of 'bba': abb
LCP max=3 avg=1 med=1