forked from UTSAVS26/PyVerse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
boyer_moore.py
49 lines (39 loc) · 1.34 KB
/
boyer_moore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# boyer_moore.py
def bad_character_heuristic(pattern):
"""
Preprocesses the pattern to create the bad character table.
Parameters:
pattern (str): The pattern to preprocess.
Returns:
dict: A dictionary mapping characters to their last occurrence index.
"""
bad_char = {}
for i in range(len(pattern)):
bad_char[pattern[i]] = i
return bad_char
def boyer_moore(text, pattern):
"""
Boyer-Moore algorithm for pattern searching.
This function finds all occurrences of 'pattern' in 'text'
using the Boyer-Moore algorithm, which skips sections of the text.
Parameters:
text (str): The text in which to search for the pattern.
pattern (str): The pattern to search for.
Prints the starting index of each occurrence of the pattern.
"""
bad_char = bad_character_heuristic(pattern)
m = len(pattern)
n = len(text)
s = 0 # Shift of the pattern with respect to text
while s <= n - m:
j = m - 1
while j >= 0 and pattern[j] == text[s + j]:
j -= 1
if j < 0:
print(f"Pattern found at index {s}")
s += (m - bad_char.get(text[s + m], -1)) if s + m < n else 1
else:
s += max(1, j - bad_char.get(text[s + j], -1))
# Example usage
if __name__ == "__main__":
boyer_moore("ababcabcab", "abc")