Skip to content

Commit a78c7f4

Browse files
joost-jHoroficSchamper
authored
Add parser for unsaved Windows Notepad tabs (#540)
Co-authored-by: Stefan de Reuver <9864602+Horofic@users.noreply.github.com> Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com>
1 parent 1123da0 commit a78c7f4

22 files changed

+543
-0
lines changed

dissect/target/plugins/apps/texteditor/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
2+
from dissect.target.helpers.record import create_extended_descriptor
3+
from dissect.target.plugin import NamespacePlugin
4+
5+
GENERIC_TAB_CONTENTS_RECORD_FIELDS = [("string", "content"), ("path", "path"), ("string", "deleted_content")]
6+
7+
TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
8+
"texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
9+
)
10+
11+
12+
class TexteditorPlugin(NamespacePlugin):
13+
__namespace__ = "texteditor"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import zlib
5+
from typing import Iterator
6+
7+
from dissect.cstruct import cstruct
8+
from dissect.util.ts import wintimestamp
9+
from flow.record.fieldtypes import digest
10+
11+
from dissect.target.exceptions import UnsupportedPluginError
12+
from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
13+
from dissect.target.helpers.fsutil import TargetPath
14+
from dissect.target.helpers.record import (
15+
UnixUserRecord,
16+
WindowsUserRecord,
17+
create_extended_descriptor,
18+
)
19+
from dissect.target.plugin import export
20+
from dissect.target.plugins.apps.texteditor.texteditor import (
21+
GENERIC_TAB_CONTENTS_RECORD_FIELDS,
22+
TexteditorPlugin,
23+
)
24+
from dissect.target.target import Target
25+
26+
# Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR
27+
# thread. This really helped to figure out the last missing bits and pieces
28+
# required for recovering text from these files.
29+
30+
windowstab_def = """
31+
struct file_header {
32+
char magic[2]; // NP
33+
uleb128 updateNumber; // increases on every settings update when fileType=9,
34+
// doesn't seem to change on fileType 0 or 1
35+
uleb128 fileType; // 0 if unsaved, 1 if saved, 9 if contains settings?
36+
}
37+
38+
struct tab_header_saved {
39+
uleb128 filePathLength;
40+
wchar filePath[filePathLength];
41+
uleb128 fileSize; // likely similar to fixedSizeBlockLength
42+
uleb128 encoding;
43+
uleb128 carriageReturnType;
44+
uleb128 timestamp; // Windows Filetime format (not unix timestamp)
45+
char sha256[32];
46+
char unk0;
47+
char unk1;
48+
uleb128 fixedSizeBlockLength;
49+
uleb128 fixedSizeBlockLengthDuplicate;
50+
uint8 wordWrap; // 1 if wordwrap enabled, 0 if disabled
51+
uint8 rightToLeft;
52+
uint8 showUnicode;
53+
uint8 optionsVersion;
54+
};
55+
56+
struct tab_header_unsaved {
57+
char unk0;
58+
uleb128 fixedSizeBlockLength; // will always be 00 when unsaved because size is not yet known
59+
uleb128 fixedSizeBlockLengthDuplicate; // will always be 00 when unsaved because size is not yet known
60+
uint8 wordWrap; // 1 if wordwrap enabled, 0 if disabled
61+
uint8 rightToLeft;
62+
uint8 showUnicode;
63+
uint8 optionsVersion;
64+
};
65+
66+
struct tab_header_crc32_stub {
67+
char unk1;
68+
char unk2;
69+
char crc32[4];
70+
};
71+
72+
struct fixed_size_data_block {
73+
uleb128 nAdded;
74+
wchar data[nAdded];
75+
uint8 hasRemainingVariableDataBlocks; // indicates whether after this single-data block more data will follow
76+
char crc32[4];
77+
};
78+
79+
struct variable_size_data_block {
80+
uleb128 offset;
81+
uleb128 nDeleted;
82+
uleb128 nAdded;
83+
wchar data[nAdded];
84+
char crc32[4];
85+
};
86+
87+
struct options_v1 {
88+
uleb128 unk;
89+
};
90+
91+
struct options_v2 {
92+
uleb128 unk1; // likely autocorrect or spellcheck
93+
uleb128 unk2; // likely autocorrect or spellcheck
94+
};
95+
"""
96+
97+
WINDOWS_SAVED_TABS_EXTRA_FIELDS = [("datetime", "modification_time"), ("digest", "hashes"), ("path", "saved_path")]
98+
99+
WindowsNotepadUnsavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
100+
"texteditor/windowsnotepad/tab/unsaved",
101+
GENERIC_TAB_CONTENTS_RECORD_FIELDS,
102+
)
103+
104+
WindowsNotepadSavedTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
105+
"texteditor/windowsnotepad/tab/saved",
106+
GENERIC_TAB_CONTENTS_RECORD_FIELDS + WINDOWS_SAVED_TABS_EXTRA_FIELDS,
107+
)
108+
109+
c_windowstab = cstruct().load(windowstab_def)
110+
111+
112+
def _calc_crc32(data: bytes) -> bytes:
113+
"""Perform a CRC32 checksum on the data and return it as bytes."""
114+
return zlib.crc32(data).to_bytes(length=4, byteorder="big")
115+
116+
117+
class WindowsNotepadTab:
118+
"""Windows notepad tab content parser"""
119+
120+
def __init__(self, file: TargetPath):
121+
self.file = file
122+
self.is_saved = None
123+
self.content = None
124+
self.deleted_content = None
125+
self._process_tab_file()
126+
127+
def __repr__(self) -> str:
128+
return (
129+
f"<{self.__class__.__name__} saved={self.is_saved} "
130+
f"content_size={len(self.content)} has_deleted_content={self.deleted_content is not None}>"
131+
)
132+
133+
def _process_tab_file(self) -> None:
134+
"""Parse a binary tab file and reconstruct the contents."""
135+
with self.file.open("rb") as fh:
136+
# Header is the same for all types
137+
self.file_header = c_windowstab.file_header(fh)
138+
139+
# fileType == 1 # 0 is unsaved, 1 is saved, 9 is settings?
140+
self.is_saved = self.file_header.fileType == 1
141+
142+
# Tabs can be saved to a file with a filename on disk, or unsaved (kept in the TabState folder).
143+
# Depending on the file's saved state, different header fields are present
144+
self.tab_header = (
145+
c_windowstab.tab_header_saved(fh) if self.is_saved else c_windowstab.tab_header_unsaved(fh)
146+
)
147+
148+
# There appears to be a optionsVersion field that specifies the options that are passed.
149+
# At the moment of writing, it is not sure whether this specifies a version or a number of bytes
150+
# that is parsed, so just going with the 'optionsVersion' type for now.
151+
# We don't use the options, but since they are required for the CRC32 checksum
152+
# we store the byte representation
153+
if self.tab_header.optionsVersion == 0:
154+
# No options specified
155+
self.options = b""
156+
elif self.tab_header.optionsVersion == 1:
157+
self.options = c_windowstab.options_v1(fh).dumps()
158+
elif self.tab_header.optionsVersion == 2:
159+
self.options = c_windowstab.options_v2(fh).dumps()
160+
else:
161+
# Raise an error, since we don't know how many bytes future optionVersions will occupy.
162+
# Now knowing how many bytes to parse can mess up the alignment and structs.
163+
raise NotImplementedError("Unknown Windows Notepad tab option version")
164+
165+
# If the file is not saved to disk and no fixedSizeBlockLength is present, an extra checksum stub
166+
# is present. So parse that first
167+
if not self.is_saved and self.tab_header.fixedSizeBlockLength == 0:
168+
# Two unknown bytes before the CRC32
169+
tab_header_crc32_stub = c_windowstab.tab_header_crc32_stub(fh)
170+
171+
# Calculate CRC32 of the header and check if it matches
172+
actual_header_crc32 = _calc_crc32(
173+
self.file_header.dumps()[3:]
174+
+ self.tab_header.dumps()
175+
+ self.options
176+
+ tab_header_crc32_stub.dumps()[:-4]
177+
)
178+
if tab_header_crc32_stub.crc32 != actual_header_crc32:
179+
logging.warning(
180+
"CRC32 mismatch in header of file: %s (expected=%s, actual=%s)",
181+
self.file.name,
182+
tab_header_crc32_stub.crc32.hex(),
183+
actual_header_crc32.hex(),
184+
)
185+
186+
# Used to store the final content
187+
self.content = ""
188+
189+
# In the case that a fixedSizeDataBlock is present, this value is set to a nonzero value
190+
if self.tab_header.fixedSizeBlockLength > 0:
191+
# So we parse the fixed size data block
192+
self.data_entry = c_windowstab.fixed_size_data_block(fh)
193+
194+
# The header (minus the magic) plus all data is included in the checksum
195+
actual_crc32 = _calc_crc32(
196+
self.file_header.dumps()[3:] + self.tab_header.dumps() + self.options + self.data_entry.dumps()[:-4]
197+
)
198+
199+
if self.data_entry.crc32 != actual_crc32:
200+
logging.warning(
201+
"CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
202+
self.file.name,
203+
self.data_entry.crc32.hex(),
204+
actual_crc32.hex(),
205+
)
206+
207+
# Add the content of the fixed size data block to the tab content
208+
self.content += self.data_entry.data
209+
210+
# Used to store the deleted content, if available
211+
deleted_content = ""
212+
213+
# If fixedSizeBlockLength in the header has a value of zero, this means that the entire file consists of
214+
# variable-length blocks. Furthermore, if there is any remaining data after the
215+
# first fixed size blocks, as indicated by the value of hasRemainingVariableDataBlocks,
216+
# also continue we also want to continue parsing
217+
if self.tab_header.fixedSizeBlockLength == 0 or (
218+
self.tab_header.fixedSizeBlockLength > 0 and self.data_entry.hasRemainingVariableDataBlocks == 1
219+
):
220+
# Here, data is stored in variable-length blocks. This happens, for example, when several
221+
# additions and deletions of characters have been recorded and these changes have not been 'flushed'
222+
223+
# Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
224+
# a list is used to easily insert text at offsets
225+
text = []
226+
227+
while True:
228+
# Unfortunately, there is no way of determining how many blocks there are. So just try to parse
229+
# until we reach EOF, after which we stop.
230+
try:
231+
data_entry = c_windowstab.variable_size_data_block(fh)
232+
except EOFError:
233+
break
234+
235+
# Either the nAdded is nonzero, or the nDeleted
236+
if data_entry.nAdded > 0:
237+
# Check the CRC32 checksum for this block
238+
actual_crc32 = _calc_crc32(data_entry.dumps()[:-4])
239+
if data_entry.crc32 != actual_crc32:
240+
logging.warning(
241+
"CRC32 mismatch in multi-block file: %s (expected=%s, actual=%s)",
242+
self.file.name,
243+
data_entry.crc32.hex(),
244+
actual_crc32.hex(),
245+
)
246+
247+
# Insert the text at the correct offset.
248+
for idx in range(data_entry.nAdded):
249+
text.insert(data_entry.offset + idx, data_entry.data[idx])
250+
251+
elif data_entry.nDeleted > 0:
252+
# Create a new slice. Include everything up to the offset,
253+
# plus everything after the nDeleted following bytes
254+
deleted_content += "".join(text[data_entry.offset : data_entry.offset + data_entry.nDeleted])
255+
text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]
256+
257+
# Join all the characters to reconstruct the original text within the variable-length data blocks
258+
text = "".join(text)
259+
260+
# Finally, add the reconstructed text to the tab content
261+
self.content += text
262+
263+
# Set None if no deleted content was found
264+
self.deleted_content = deleted_content if deleted_content else None
265+
266+
267+
class WindowsNotepadPlugin(TexteditorPlugin):
268+
"""Windows notepad tab content plugin."""
269+
270+
__namespace__ = "windowsnotepad"
271+
272+
GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"
273+
274+
def __init__(self, target: Target):
275+
super().__init__(target)
276+
self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
277+
for user_details in self.target.user_details.all_with_home():
278+
for tab_file in user_details.home_path.glob(self.GLOB):
279+
# These files seem to contain information on different settings / configurations,
280+
# and are skipped for now
281+
if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
282+
continue
283+
284+
self.users_tabs.append((tab_file, user_details.user))
285+
286+
def check_compatible(self) -> None:
287+
if not self.users_tabs:
288+
raise UnsupportedPluginError("No Windows Notepad tab files found")
289+
290+
@export(record=[WindowsNotepadSavedTabRecord, WindowsNotepadUnsavedTabRecord])
291+
def tabs(self) -> Iterator[WindowsNotepadSavedTabRecord | WindowsNotepadUnsavedTabRecord]:
292+
"""Return contents from Windows 11 Notepad tabs - and its deleted content if available.
293+
294+
Windows Notepad application for Windows 11 is now able to restore both saved and unsaved tabs when you re-open
295+
the application.
296+
297+
298+
Resources:
299+
- https://github.com/fox-it/dissect.target/pull/540
300+
- https://github.com/JustArion/Notepad-Tabs
301+
- https://github.com/ogmini/Notepad-Tabstate-Buffer
302+
- https://github.com/ogmini/Notepad-State-Library
303+
- https://github.com/Nordgaren/tabstate-util
304+
- https://github.com/Nordgaren/tabstate-util/issues/1
305+
- https://medium.com/@mahmoudsoheem/new-digital-forensics-artifact-from-windows-notepad-527645906b7b
306+
307+
Yields a WindowsNotepadSavedTabRecord or WindowsNotepadUnsavedTabRecord. with fields:
308+
309+
.. code-block:: text
310+
311+
content (string): The content of the tab.
312+
path (path): The path to the tab file.
313+
deleted_content (string): The deleted content of the tab, if available.
314+
hashes (digest): A digest of the tab content.
315+
saved_path (path): The path where the tab was saved.
316+
modification_time (datetime): The modification time of the tab.
317+
"""
318+
for file, user in self.users_tabs:
319+
# Parse the file
320+
tab: WindowsNotepadTab = WindowsNotepadTab(file)
321+
322+
if tab.is_saved:
323+
yield WindowsNotepadSavedTabRecord(
324+
content=tab.content,
325+
path=tab.file,
326+
deleted_content=tab.deleted_content,
327+
hashes=digest((None, None, tab.tab_header.sha256.hex())),
328+
saved_path=tab.tab_header.filePath,
329+
modification_time=wintimestamp(tab.tab_header.timestamp),
330+
_target=self.target,
331+
_user=user,
332+
)
333+
else:
334+
yield WindowsNotepadUnsavedTabRecord(
335+
content=tab.content,
336+
path=tab.file,
337+
_target=self.target,
338+
_user=user,
339+
deleted_content=tab.deleted_content,
340+
)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

tests/plugins/apps/texteditor/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)