Given two strings X and Y, the task is to find the length of the longest common substring.
Examples:
Input: X = “GeeksforGeeks”, y = “GeeksQuiz” Output: 5 Explanation: The longest common substring is “Geeks” and is of length 5.
Input: X = “abcdxyz”, y = “xyzabcd” Output: 4 Explanation: The longest common substring is “abcd” and is of length 4.
Input: X = “zxabcdezy”, y = “yzabcdezx” Output: 6 Explanation: The longest common substring is “abcdez” and is of length 6.
Longest Common Substring using Dynamic Programming:
This problem can be solved using dynamic programming in O(len(X) * len(Y)), see this. In this article we are going to discuss about an efficient approach.
Longest Common Substring using Binary Search and Rolling Hash
Pre-requisites:
Observation:
If there is a common substring of length K in both the strings, then there will be common substrings of length 0, 1, …, K – 1. Hence, binary search on answer can be applied.\
Follow the below steps to implement the idea:
- Smallest possible answer(low) = 0 and largest possible answer(high) = min(len(X), len(Y)), Range of binary search will be [0, min(len(X), len(Y))].
- For every mid, check if there exists a common substring of length mid, if exists then update low, else update high.
- To check the existence of a common substring of length K, Polynomial rolling hash function can be used.
- Iterate over all the windows of size K in string X and string Y and get the hash.
- If there is a common hash return True, else return False.
Below is the implementation of this approach.
C++
#include <iostream>
#include <cmath>
#include <vector>
class ComputeHash {
private :
std::vector< long > hash;
std::vector< long > invMod;
long mod;
long p;
public :
ComputeHash(std::string s, long p, long mod) {
int n = s.length();
this ->hash.resize(n);
this ->invMod.resize(n);
this ->mod = mod;
this ->p = p;
long pPow = 1;
long hashValue = 0;
for ( int i = 0; i < n; i++) {
char c = s[i];
c = static_cast < char >(c - 'A' + 1);
hashValue = (hashValue + c * pPow) % this ->mod;
this ->hash[i] = hashValue;
this ->invMod[i] = static_cast < long >(std:: pow (pPow, this ->mod - 2)) % this ->mod;
pPow = (pPow * this ->p) % this ->mod;
}
}
long getHash( int l, int r) {
if (l == 0) {
return this ->hash[r];
}
long window = ( this ->hash[r] - this ->hash[l - 1] + this ->mod) % this ->mod;
return (window * this ->invMod[l]) % this ->mod;
}
};
bool exists( int k, std::string X, std::string Y, ComputeHash &hashX1,
ComputeHash &hashX2, ComputeHash &hashY1, ComputeHash &hashY2) {
for ( int i = 0; i <= X.length() - k; i++) {
for ( int j = 0; j <= Y.length() - k; j++) {
if (X.substr(i, k) == Y.substr(j, k)) {
return true ;
}
}
}
return false ;
}
int longestCommonSubstr(std::string X, std::string Y) {
int n = X.length();
int m = Y.length();
long p1 = 31;
long p2 = 37;
long m1 = static_cast < long >(std:: pow (10, 9) + 9);
long m2 = static_cast < long >(std:: pow (10, 9) + 7);
ComputeHash hashX1(X, p1, m1);
ComputeHash hashX2(X, p2, m2);
ComputeHash hashY1(Y, p1, m1);
ComputeHash hashY2(Y, p2, m2);
int low = 0, high = std::min(n, m);
int answer = 0;
while (low <= high) {
int mid = (low + high) / 2;
if (exists(mid, X, Y, hashX1, hashX2, hashY1, hashY2)) {
answer = mid;
low = mid + 1;
} else {
high = mid - 1;
}
}
return answer;
}
int main() {
std::string X = "GeeksforGeeks" ;
std::string Y = "GeeksQuiz" ;
std::cout << longestCommonSubstr(X, Y) << std::endl;
return 0;
}
|
Java
import java.util.HashSet;
import java.util.Set;
class ComputeHash {
private long [] hash;
private long [] invMod;
private long mod;
private long p;
public ComputeHash(String s, long p, long mod) {
int n = s.length();
this .hash = new long [n];
this .invMod = new long [n];
this .mod = mod;
this .p = p;
long pPow = 1 ;
long hashValue = 0 ;
for ( int i = 0 ; i < n; i++) {
char c = s.charAt(i);
c = ( char ) (c - 'A' + 1 );
hashValue = (hashValue + c * pPow) % this .mod;
this .hash[i] = hashValue;
this .invMod[i] = ( long )(Math.pow(pPow, this .mod - 2 ) % this .mod);
pPow = (pPow * this .p) % this .mod;
}
}
public long getHash( int l, int r) {
if (l == 0 ) {
return this .hash[r];
}
long window = ( this .hash[r] - this .hash[l - 1 ]) % this .mod;
return (window * this .invMod[l]) % this .mod;
}
}
public class Main {
public static int longestCommonSubstr(String X, String Y) {
int n = X.length();
int m = Y.length();
long p1 = 31 ;
long p2 = 37 ;
long m1 = ( long ) (Math.pow( 10 , 9 ) + 9 );
long m2 = ( long ) (Math.pow( 10 , 9 ) + 7 );
ComputeHash hashX1 = new ComputeHash(X, p1, m1);
ComputeHash hashX2 = new ComputeHash(X, p2, m2);
ComputeHash hashY1 = new ComputeHash(Y, p1, m1);
ComputeHash hashY2 = new ComputeHash(Y, p2, m2);
int low = 0 , high = Math.min(n, m);
int answer = 0 ;
while (low <= high) {
int mid = (low + high) / 2 ;
if (exists(mid, X, Y)) {
answer = mid;
low = mid + 1 ;
} else {
high = mid - 1 ;
}
}
return answer;
}
private static boolean exists( int k, String X, String Y) {
for ( int i = 0 ; i <= X.length() - k; i++) {
for ( int j = 0 ; j <= Y.length() - k; j++) {
if (X.substring(i, i + k).equals(Y.substring(j, j + k))) {
return true ;
}
}
}
return false ;
}
public static void main(String[] args) {
String X = "GeeksforGeeks" ;
String Y = "GeeksQuiz" ;
System.out.println(longestCommonSubstr(X, Y));
}
}
|
Python3
class ComputeHash:
def __init__( self , s, p, mod):
n = len (s)
self . hash = [ 0 ] * n
self .inv_mod = [ 0 ] * n
self .mod = mod
self .p = p
p_pow = 1
hash_value = 0
for i in range (n):
c = ord (s[i]) - 65 + 1
hash_value = (hash_value + c * p_pow) % self .mod
self . hash [i] = hash_value
self .inv_mod[i] = pow (p_pow, self .mod - 2 , self .mod)
p_pow = (p_pow * self .p) % self .mod
def get_hash( self , l, r):
if l = = 0 :
return self . hash [r]
window = ( self . hash [r] - self . hash [l - 1 ]) % self .mod
return (window * self .inv_mod[l]) % self .mod
def longestCommonSubstr(X, Y, n, m):
p1, p2 = 31 , 37
m1, m2 = pow ( 10 , 9 ) + 9 , pow ( 10 , 9 ) + 7
hash_X1 = ComputeHash(X, p1, m1)
hash_X2 = ComputeHash(X, p2, m2)
hash_Y1 = ComputeHash(Y, p1, m1)
hash_Y2 = ComputeHash(Y, p2, m2)
def exists(k):
if k = = 0 :
return True
st = set ()
for i in range (n - k + 1 ):
h1 = hash_X1.get_hash(i, i + k - 1 )
h2 = hash_X2.get_hash(i, i + k - 1 )
cur_window_hash = (h1, h2)
st.add(cur_window_hash)
for i in range (m - k + 1 ):
h1 = hash_Y1.get_hash(i, i + k - 1 )
h2 = hash_Y2.get_hash(i, i + k - 1 )
cur_window_hash = (h1, h2)
if cur_window_hash in st:
return True
return False
answer = 0
low, high = 0 , min (n, m)
while low < = high:
mid = (low + high) / / 2
if exists(mid):
answer = mid
low = mid + 1
else :
high = mid - 1
return answer
if __name__ = = '__main__' :
X = 'GeeksforGeeks'
Y = 'GeeksQuiz'
print (longestCommonSubstr(X, Y, len (X), len (Y)))
|
C#
using System;
using System.Collections.Generic;
class ComputeHash
{
private List< long > hash;
private List< long > invMod;
private long mod;
private long p;
public ComputeHash( string s, long p, long mod)
{
int n = s.Length;
this .hash = new List< long >(n);
this .invMod = new List< long >(n);
this .mod = mod;
this .p = p;
long pPow = 1;
long hashValue = 0;
for ( int i = 0; i < n; i++)
{
char c = s[i];
c = ( char )(c - 'A' + 1);
hashValue = (hashValue + c * pPow) % this .mod;
this .hash.Add(hashValue);
this .invMod.Add(ModularInverse(pPow, this .mod));
pPow = (pPow * this .p) % this .mod;
}
}
private long ModularInverse( long a, long m)
{
long m0 = m, t, q;
long x0 = 0, x1 = 1;
if (m == 1)
return 0;
while (a > 1)
{
q = a / m;
t = m;
m = a % m;
a = t;
t = x0;
x0 = x1 - q * x0;
x1 = t;
}
if (x1 < 0)
x1 += m0;
return x1;
}
public long GetHash( int l, int r)
{
if (l == 0)
{
return this .hash[r];
}
long window = ( this .hash[r] - this .hash[l - 1] + this .mod) % this .mod;
return (window * this .invMod[l]) % this .mod;
}
}
class LongestCommonSubstring
{
private static bool Exists( int k, string X, string Y,
ComputeHash hashX1, ComputeHash hashX2,
ComputeHash hashY1, ComputeHash hashY2)
{
for ( int i = 0; i <= X.Length - k; i++)
{
for ( int j = 0; j <= Y.Length - k; j++)
{
if (X.Substring(i, k) == Y.Substring(j, k))
{
return true ;
}
}
}
return false ;
}
private static int LongestCommonSubstr( string X, string Y)
{
int n = X.Length;
int m = Y.Length;
long p1 = 31;
long p2 = 37;
long m1 = ( long )Math.Pow(10, 9) + 9;
long m2 = ( long )Math.Pow(10, 9) + 7;
ComputeHash hashX1 = new ComputeHash(X, p1, m1);
ComputeHash hashX2 = new ComputeHash(X, p2, m2);
ComputeHash hashY1 = new ComputeHash(Y, p1, m1);
ComputeHash hashY2 = new ComputeHash(Y, p2, m2);
int low = 0, high = Math.Min(n, m);
int answer = 0;
while (low <= high)
{
int mid = (low + high) / 2;
if (Exists(mid, X, Y, hashX1, hashX2, hashY1, hashY2))
{
answer = mid;
low = mid + 1;
}
else
{
high = mid - 1;
}
}
return answer;
}
static void Main()
{
string X = "GeeksforGeeks" ;
string Y = "GeeksQuiz" ;
Console.WriteLine(LongestCommonSubstr(X, Y));
}
}
|
Javascript
class ComputeHash {
constructor(mod) {
this .mod = mod;
}
compute(s) {
let hashValue = 0n;
let pPow = 1n;
for (let i = 0; i < s.length; i++) {
let c = BigInt(s.charCodeAt(i) - "A" .charCodeAt(0) + 1);
hashValue = (hashValue + c * pPow) % this .mod;
pPow = pPow * 26n % this .mod;
}
return hashValue;
}
}
function longestCommonSubstr(s, t) {
const mod = BigInt(10**9+7);
const p1 = new ComputeHash(mod);
const p2 = new ComputeHash(mod);
let left = 0, right = Math.min(s.length, t.length);
while (left < right) {
const mid = Math.floor((left + right + 1) / 2);
let found = false ;
const set = new Set();
for (let i = 0; i + mid <= s.length; i++) {
const hashValue = p1.compute(s.substring(i, i + mid));
set.add(hashValue);
}
for (let i = 0; i + mid <= t.length; i++) {
const hashValue = p2.compute(t.substring(i, i + mid));
if (set.has(hashValue)) {
found = true ;
break ;
}
}
if (found) {
left = mid;
} else {
right = mid - 1;
}
}
return left;
}
console.log(longestCommonSubstr( "ABABAB" , "BABABA" ));
|
Time Complexity: O(n * log(m1)) + O(m * log((m1)) + O((n + m) * log(min(n, m)))
- Generating hash object takes O(n*log(m1)), where n is the length of string and m1 = pow(10, 9) + 7.
- Binary search takes O(log(min(n, m))), where n, m are the lengths of both strings.
- Hash of a window takes O(1) time.
- Exist function takes O(n + m) time.
Auxiliary Space: O(n + m)
|