Skip to content

Commit 70ae493

Browse files
committed
Improve support searching indexes
1 parent 8ce6797 commit 70ae493

11 files changed

+531
-148
lines changed

dissect/esedb/btree.py

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
from dissect.esedb.exceptions import KeyNotFoundError, NoNeighbourPageError
6+
7+
if TYPE_CHECKING:
8+
from dissect.esedb.esedb import EseDB
9+
from dissect.esedb.page import Node, Page
10+
11+
12+
class BTree:
13+
"""A simple implementation for searching the ESE B+Trees.
14+
15+
This is a stateful interactive class that moves an internal cursor to a position within the BTree.
16+
17+
Args:
18+
esedb: An instance of :class:`~dissect.esedb.esedb.EseDB`.
19+
page: The page to open a BTree on.
20+
"""
21+
22+
def __init__(self, esedb: EseDB, root: int | Page):
23+
self.esedb = esedb
24+
25+
if isinstance(root, int):
26+
page_num = root
27+
root = esedb.page(page_num)
28+
else:
29+
page_num = root.num
30+
31+
self.root = root
32+
33+
self._page = root
34+
self._page_num = page_num
35+
self._node_num = 0
36+
37+
def reset(self) -> None:
38+
"""Reset the internal state to the root of the BTree."""
39+
self._page = self.root
40+
self._page_num = self._page.num
41+
self._node_num = 0
42+
43+
def node(self) -> Node:
44+
"""Return the node the BTree is currently on."""
45+
return self._page.node(self._node_num)
46+
47+
def next(self) -> Node:
48+
"""Move the BTree to the next node and return it.
49+
50+
Can move the BTree to the next page as a side effect.
51+
"""
52+
if self._node_num + 1 > self._page.node_count - 1:
53+
self.next_page()
54+
else:
55+
self._node_num += 1
56+
57+
return self.node()
58+
59+
def next_page(self) -> None:
60+
"""Move the BTree to the next page in the tree.
61+
62+
Raises:
63+
NoNeighbourPageError: If the current page has no next page.
64+
"""
65+
if self._page.next_page:
66+
self._page = self.esedb.page(self._page.next_page)
67+
self._node_num = 0
68+
else:
69+
raise NoNeighbourPageError(f"{self._page} has no next page")
70+
71+
def prev(self) -> Node:
72+
"""Move the BTree to the previous node and return it.
73+
74+
Can move the BTree to the previous page as a side effect.
75+
"""
76+
if self._node_num - 1 < 0:
77+
self.prev_page()
78+
else:
79+
self._node_num -= 1
80+
81+
return self.node()
82+
83+
def prev_page(self) -> None:
84+
"""Move the BTree to the previous page in the tree.
85+
86+
Raises:
87+
NoNeighbourPageError: If the current page has no previous page.
88+
"""
89+
if self._page.previous_page:
90+
self._page = self.esedb.page(self._page.previous_page)
91+
self._node_num = self._page.node_count - 1
92+
else:
93+
raise NoNeighbourPageError(f"{self._page} has no previous page")
94+
95+
def search(self, key: bytes, exact: bool = True) -> Node:
96+
"""Search the tree for the given key.
97+
98+
Moves the BTree to the matching node, or on the last node that is less than the requested key.
99+
100+
Args:
101+
key: The key to search for.
102+
exact: Whether to only return successfully on an exact match.
103+
104+
Raises:
105+
KeyNotFoundError: If an ``exact`` match was requested but not found.
106+
"""
107+
page = self._page
108+
while True:
109+
node = find_node(page, key)
110+
111+
if page.is_branch:
112+
page = self.esedb.page(node.child)
113+
else:
114+
self._page = page
115+
self._page_num = page.num
116+
self._node_num = node.num
117+
break
118+
119+
if exact and key != node.key:
120+
raise KeyNotFoundError(f"Can't find key: {key}")
121+
122+
return self.node()
123+
124+
125+
def find_node(page: Page, key: bytes) -> Node:
126+
"""Search a page for a node matching ``key``.
127+
128+
Args:
129+
page: The page to search.
130+
key: The key to search.
131+
"""
132+
first_node_idx = 0
133+
last_node_idx = page.node_count - 1
134+
135+
node = None
136+
while first_node_idx < last_node_idx:
137+
node_idx = (first_node_idx + last_node_idx) // 2
138+
node = page.node(node_idx)
139+
140+
# It turns out that the way BTree keys are compared matches 1:1 with how Python compares bytes
141+
# First compare data, then length
142+
if key < node.key:
143+
last_node_idx = node_idx
144+
elif key == node.key:
145+
if page.is_branch:
146+
# If there's an exact match on a key on a branch page, the actual leaf nodes are in the next branch
147+
# Page keys for branch pages appear to be non-inclusive upper bounds
148+
node_idx = min(node_idx + 1, page.node_count - 1)
149+
node = page.node(node_idx)
150+
151+
return node
152+
else:
153+
first_node_idx = node_idx + 1
154+
155+
# We're at the last node
156+
return page.node(first_node_idx)

dissect/esedb/c_esedb.py

+30
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,34 @@
426426
DotNetGuid = 0x00040000, // index over GUID column according to .Net GUID sort order
427427
ImmutableStructure = 0x00080000, // Do not write to the input structures during a JetCreateIndexN call.
428428
};
429+
430+
flag IDBFLAG : uint16 {
431+
Unique = 0x0001, // Duplicate keys not allowed
432+
AllowAllNulls = 0x0002, // Make entries for NULL keys (all segments are null)
433+
AllowFirstNull = 0x0004, // First index column NULL allowed in index
434+
AllowSomeNulls = 0x0008, // Make entries for keys with some null segments
435+
NoNullSeg = 0x0010, // Don't allow a NULL key segment
436+
Primary = 0x0020, // Index is the primary index
437+
LocaleSet = 0x0040, // Index locale information (locale name) is set (JET_bitIndexUnicode was specified).
438+
Multivalued = 0x0080, // Has a multivalued segment
439+
TemplateIndex = 0x0100, // Index of a template table
440+
DerivedIndex = 0x0200, // Index derived from template table
441+
// Note that this flag is persisted, but
442+
// never used in an in-memory IDB, because
443+
// we use the template index IDB instead.
444+
LocalizedText = 0x0400, // Has a unicode text column? (code page is 1200)
445+
SortNullsHigh = 0x0800, // NULL sorts after data
446+
// Jan 2012: MSU is being removed. fidbUnicodeFixupOn should no longer be referenced.
447+
UnicodeFixupOn_Deprecated = 0x1000, // Track entries with undefined Unicode codepoints
448+
CrossProduct = 0x2000, // all combinations of multi-valued columns are indexed
449+
DisallowTruncation = 0x4000, // fail update rather than allow key truncation
450+
NestedTable = 0x8000, // combinations of multi-valued columns of same itagSequence are indexed
451+
};
452+
453+
flag IDXFLAG : uint16 {
454+
ExtendedColumns = 0x0001, // IDXSEGs are comprised of JET_COLUMNIDs, not FIDs
455+
DotNetGuid = 0x0002, // GUIDs sort according to .Net rules
456+
};
429457
""" # noqa E501
430458

431459
c_esedb = cstruct().load(esedb_def)
@@ -444,6 +472,8 @@
444472
TAGFLD_HEADER = c_esedb.TAGFLD_HEADER
445473
CODEPAGE = c_esedb.CODEPAGE
446474
COMPRESSION_SCHEME = c_esedb.COMPRESSION_SCHEME
475+
IDBFLAG = c_esedb.IDBFLAG
476+
IDXFLAG = c_esedb.IDXFLAG
447477

448478
CODEPAGE_MAP = {
449479
CODEPAGE.UNICODE: "utf-16-le",

0 commit comments

Comments
 (0)