diff --git a/Python Server/app/.dockerignore b/Python-Server/app/.dockerignore similarity index 100% rename from Python Server/app/.dockerignore rename to Python-Server/app/.dockerignore diff --git a/Python Server/app/Dockerfile b/Python-Server/app/Dockerfile similarity index 100% rename from Python Server/app/Dockerfile rename to Python-Server/app/Dockerfile diff --git a/Python Server/app/NotionAI/NotionAI.py b/Python-Server/app/NotionAI/NotionAI.py similarity index 100% rename from Python Server/app/NotionAI/NotionAI.py rename to Python-Server/app/NotionAI/NotionAI.py diff --git a/Python Server/app/NotionAI/mind_structure.py b/Python-Server/app/NotionAI/mind_structure.py similarity index 100% rename from Python Server/app/NotionAI/mind_structure.py rename to Python-Server/app/NotionAI/mind_structure.py diff --git a/Python Server/app/NotionAI/property_manager/multi_tag_manager.py b/Python-Server/app/NotionAI/property_manager/multi_tag_manager.py similarity index 100% rename from Python Server/app/NotionAI/property_manager/multi_tag_manager.py rename to Python-Server/app/NotionAI/property_manager/multi_tag_manager.py diff --git a/Python Server/app/NotionAI/property_manager/property_manager.py b/Python-Server/app/NotionAI/property_manager/property_manager.py similarity index 100% rename from Python Server/app/NotionAI/property_manager/property_manager.py rename to Python-Server/app/NotionAI/property_manager/property_manager.py diff --git a/Python Server/app/NotionAI/property_manager/tag_object.py b/Python-Server/app/NotionAI/property_manager/tag_object.py similarity index 100% rename from Python Server/app/NotionAI/property_manager/tag_object.py rename to Python-Server/app/NotionAI/property_manager/tag_object.py diff --git a/Python Server/app/NotionAI/utils.py b/Python-Server/app/NotionAI/utils.py similarity index 100% rename from Python Server/app/NotionAI/utils.py rename to Python-Server/app/NotionAI/utils.py diff --git a/Python Server/app/docker-compose.yml b/Python-Server/app/docker-compose.yml similarity index 100% rename from Python Server/app/docker-compose.yml rename to Python-Server/app/docker-compose.yml diff --git a/Python Server/app/image_tagging/clarifai_tagging/clarifai_tagging.py b/Python-Server/app/image_tagging/clarifai_tagging/clarifai_tagging.py similarity index 100% rename from Python Server/app/image_tagging/clarifai_tagging/clarifai_tagging.py rename to Python-Server/app/image_tagging/clarifai_tagging/clarifai_tagging.py diff --git a/Python Server/app/image_tagging/image_tagging.py b/Python-Server/app/image_tagging/image_tagging.py similarity index 100% rename from Python Server/app/image_tagging/image_tagging.py rename to Python-Server/app/image_tagging/image_tagging.py diff --git a/Python Server/app/image_tagging/tensorflow_tagging/tensorflow_tagging.py b/Python-Server/app/image_tagging/tensorflow_tagging/tensorflow_tagging.py similarity index 100% rename from Python Server/app/image_tagging/tensorflow_tagging/tensorflow_tagging.py rename to Python-Server/app/image_tagging/tensorflow_tagging/tensorflow_tagging.py diff --git a/Python Server/app/notion/__init__.py b/Python-Server/app/notion/__init__.py similarity index 100% rename from Python Server/app/notion/__init__.py rename to Python-Server/app/notion/__init__.py diff --git a/Python Server/app/notion/block.py b/Python-Server/app/notion/block.py similarity index 100% rename from Python Server/app/notion/block.py rename to Python-Server/app/notion/block.py diff --git a/Python Server/app/notion/client.py b/Python-Server/app/notion/client.py similarity index 100% rename from Python Server/app/notion/client.py rename to Python-Server/app/notion/client.py diff --git a/Python Server/app/notion/collection.py b/Python-Server/app/notion/collection.py similarity index 100% rename from Python Server/app/notion/collection.py rename to Python-Server/app/notion/collection.py diff --git a/Python Server/app/notion/logger.py b/Python-Server/app/notion/logger.py similarity index 100% rename from Python Server/app/notion/logger.py rename to Python-Server/app/notion/logger.py diff --git a/Python Server/app/notion/maps.py b/Python-Server/app/notion/maps.py similarity index 100% rename from Python Server/app/notion/maps.py rename to Python-Server/app/notion/maps.py diff --git a/Python Server/app/notion/markdown.py b/Python-Server/app/notion/markdown.py similarity index 100% rename from Python Server/app/notion/markdown.py rename to Python-Server/app/notion/markdown.py diff --git a/Python Server/app/notion/monitor.py b/Python-Server/app/notion/monitor.py similarity index 100% rename from Python Server/app/notion/monitor.py rename to Python-Server/app/notion/monitor.py diff --git a/Python Server/app/notion/operations.py b/Python-Server/app/notion/operations.py similarity index 100% rename from Python Server/app/notion/operations.py rename to Python-Server/app/notion/operations.py diff --git a/Python Server/app/notion/records.py b/Python-Server/app/notion/records.py similarity index 100% rename from Python Server/app/notion/records.py rename to Python-Server/app/notion/records.py diff --git a/Python Server/app/notion/settings.py b/Python-Server/app/notion/settings.py similarity index 100% rename from Python Server/app/notion/settings.py rename to Python-Server/app/notion/settings.py diff --git a/Python Server/app/notion/smoke_test.py b/Python-Server/app/notion/smoke_test.py similarity index 100% rename from Python Server/app/notion/smoke_test.py rename to Python-Server/app/notion/smoke_test.py diff --git a/Python Server/app/notion/space.py b/Python-Server/app/notion/space.py similarity index 100% rename from Python Server/app/notion/space.py rename to Python-Server/app/notion/space.py diff --git a/Python Server/app/notion/store.py b/Python-Server/app/notion/store.py similarity index 100% rename from Python Server/app/notion/store.py rename to Python-Server/app/notion/store.py diff --git a/Python Server/app/notion/user.py b/Python-Server/app/notion/user.py similarity index 100% rename from Python Server/app/notion/user.py rename to Python-Server/app/notion/user.py diff --git a/Python Server/app/notion/utils.py b/Python-Server/app/notion/utils.py similarity index 100% rename from Python Server/app/notion/utils.py rename to Python-Server/app/notion/utils.py diff --git a/Python-Server/app/notion1/__init__.py b/Python-Server/app/notion1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Python-Server/app/notion1/block.py b/Python-Server/app/notion1/block.py new file mode 100644 index 0000000..f19784f --- /dev/null +++ b/Python-Server/app/notion1/block.py @@ -0,0 +1,884 @@ +import mimetypes +import os +import random +import requests +import time +import uuid + +from cached_property import cached_property +from copy import deepcopy + +from .logger import logger +from .maps import property_map, field_map, mapper +from .markdown import plaintext_to_notion, notion_to_plaintext +from .operations import build_operation +from .records import Record +from .settings import S3_URL_PREFIX, BASE_URL +from .utils import ( + extract_id, + now, + get_embed_link, + get_embed_data, + add_signed_prefix_as_needed, + remove_signed_prefix_as_needed, + get_by_path, +) + + +class Children(object): + + child_list_key = "content" + + def __init__(self, parent): + self._parent = parent + self._client = parent._client + + def shuffle(self): + content = self._content_list() + random.shuffle(content) + self._parent.set(self.child_list_key, content) + + def filter(self, type=None): + kids = list(self) + if type: + if isinstance(type, str): + type = BLOCK_TYPES.get(type, Block) + kids = [kid for kid in kids if isinstance(kid, type)] + return kids + + def _content_list(self): + return self._parent.get(self.child_list_key) or [] + + def _get_block(self, id): + + block = self._client.get_block(id) + + # TODO: this is needed because there seems to be a server-side race condition with setting and getting data + # (sometimes the data previously sent hasn't yet propagated to all DB nodes, perhaps? so it fails to load here) + i = 0 + while block is None: + i += 1 + if i > 20: + return None + time.sleep(0.1) + block = self._client.get_block(id) + + if block.get("parent_id") != self._parent.id: + block._alias_parent = self._parent.id + + return block + + def __repr__(self): + if not len(self): + return "[]" + rep = "[\n" + for child in self: + rep += " {},\n".format(repr(child)) + rep += "]" + return rep + + def __len__(self): + return len(self._content_list() or []) + + def __getitem__(self, key): + result = self._content_list()[key] + if isinstance(result, list): + return [self._get_block(id) for id in result] + else: + return self._get_block(result) + + def __delitem__(self, key): + self._get_block(self._content_list()[key]).remove() + + def __iter__(self): + return iter(self._get_block(id) for id in self._content_list()) + + def __reversed__(self): + return reversed(iter(self)) + + def __contains__(self, item): + if isinstance(item, str): + item_id = extract_id(item) + elif isinstance(item, Block): + item_id = item.id + else: + return False + return item_id in self._content_list() + + def add_new(self, block_type, child_list_key=None, **kwargs): + """ + Create a new block, add it as the last child of this parent block, and return the corresponding Block instance. + `block_type` can be either a type string, or a Block subclass. + """ + + # determine the block type string from the Block class, if that's what was provided + if ( + isinstance(block_type, type) + and issubclass(block_type, Block) + and hasattr(block_type, "_type") + ): + block_type = block_type._type + elif not isinstance(block_type, str): + raise Exception( + "block_type must be a string or a Block subclass with a _type attribute" + ) + + block_id = self._client.create_record( + table="block", + parent=self._parent, + type=block_type, + child_list_key=child_list_key, + ) + + block = self._get_block(block_id) + + if kwargs: + with self._client.as_atomic_transaction(): + for key, val in kwargs.items(): + if hasattr(block, key): + setattr(block, key, val) + else: + logger.warning( + "{} does not have attribute '{}' to be set; skipping.".format( + block, key + ) + ) + + return block + + def add_alias(self, block): + """ + Adds an alias to the provided `block`, i.e. adds the block's ID to the parent's content list, + but doesn't change the block's parent_id. + """ + + # add the block to the content list of the parent + self._client.submit_transaction( + build_operation( + id=self._parent.id, + path=[self.child_list_key], + args={"id": block.id}, + command="listAfter", + ) + ) + + return self._get_block(block.id) + + +class Block(Record): + """ + Most data in Notion is stored as a "block" (including pages, and all the individual elements within a page). + These blocks have different types, and in some cases we create subclasses of this class to represent those types. + Attributes on the Block are mapped to useful attributes of the server-side data structure, as properties, so you can + get and set values on the API just by reading/writing attributes on these classes. We store a shared local cache on + the `NotionClient` object of all block data, and reference that as needed from here. Data can be refreshed from the + server using the `refresh` method. + """ + + _table = "block" + + # we'll mark it as an alias if we load the Block as a child of a page that is not its parent + _alias_parent = None + + child_list_key = "content" + + type = field_map("type") + alive = field_map("alive") + + def get_browseable_url(self): + if "page" in self._type: + return BASE_URL + self.id.replace("-", "") + else: + return self.parent.get_browseable_url() + "#" + self.id.replace("-", "") + + @property + def children(self): + if not hasattr(self, "_children"): + children_ids = self.get("content", []) + self._client.refresh_records(block=children_ids) + self._children = Children(parent=self) + return self._children + + @property + def parent(self): + + if not self.is_alias: + parent_id = self.get("parent_id") + parent_table = self.get("parent_table") + else: + parent_id = self._alias_parent + parent_table = "block" + + if parent_table == "block": + return self._client.get_block(parent_id) + elif parent_table == "collection": + return self._client.get_collection(parent_id) + elif parent_table == "space": + return self._client.get_space(parent_id) + else: + return None + + @property + def space_info(self): + return self._client.post("getPublicPageData", {"blockId": self.id}).json() + + def _str_fields(self): + """ + Determines the list of fields to include in the __str__ representation. Override and extend this in subclasses. + """ + fields = super()._str_fields() + # if this is a generic Block instance, include what type of block it is + if type(self) is Block: + fields.append("type") + return fields + + @property + def is_alias(self): + return not (self._alias_parent is None) + + def _get_mappers(self): + mappers = {} + for name in dir(self.__class__): + field = getattr(self.__class__, name) + if isinstance(field, mapper): + mappers[name] = field + return mappers + + def _convert_diff_to_changelist(self, difference, old_val, new_val): + + mappers = self._get_mappers() + changed_fields = set() + changes = [] + remaining = [] + content_changed = False + + for d in deepcopy(difference): + operation, path, values = d + + # normalize path + path = path if path else [] + path = path.split(".") if isinstance(path, str) else path + if operation in ["add", "remove"]: + path.append(values[0][0]) + while isinstance(path[-1], int): + path.pop() + path = ".".join(map(str, path)) + + # check whether it was content that changed + if path == "content": + content_changed = True + continue + + # check whether the value changed matches one of our mapped fields/properties + fields = [ + (name, field) + for name, field in mappers.items() + if path.startswith(field.path) + ] + if fields: + changed_fields.add(fields[0]) + continue + + remaining.append(d) + + if content_changed: + + old = deepcopy(old_val.get("content", [])) + new = deepcopy(new_val.get("content", [])) + + # track what's been added and removed + removed = set(old) - set(new) + added = set(new) - set(old) + for id in removed: + changes.append(("content_removed", "content", id)) + for id in added: + changes.append(("content_added", "content", id)) + + # ignore the added/removed items, and see whether order has changed + for id in removed: + old.remove(id) + for id in added: + new.remove(id) + if old != new: + changes.append(("content_reordered", "content", (old, new))) + + for name, field in changed_fields: + old = field.api_to_python(get_by_path(field.path, old_val)) + new = field.api_to_python(get_by_path(field.path, new_val)) + changes.append(("changed_field", name, (old, new))) + + return changes + super()._convert_diff_to_changelist( + remaining, old_val, new_val + ) + + def remove(self, permanently=False): + """ + Removes the node from its parent, and marks it as inactive. This corresponds to what happens in the + Notion UI when you delete a block. Note that it doesn't *actually* delete it, just orphan it, unless + `permanently` is set to True, in which case we make an extra call to hard-delete. + """ + + if not self.is_alias: + + # If it's not an alias, we actually remove the block + with self._client.as_atomic_transaction(): + + # Mark the block as inactive + self._client.submit_transaction( + build_operation( + id=self.id, path=[], args={"alive": False}, command="update" + ) + ) + + # Remove the block's ID from a list on its parent, if needed + if self.parent.child_list_key: + self._client.submit_transaction( + build_operation( + id=self.get("parent_id"), + path=[self.parent.child_list_key], + args={"id": self.id}, + command="listRemove", + table=self.get("parent_table"), + ) + ) + + if permanently: + block_id = self.id + self._client.post( + "deleteBlocks", {"blockIds": [block_id], "permanentlyDelete": True} + ) + del self._client._store._values["block"][block_id] + + else: + + # Otherwise, if it's an alias, we only remove it from the alias parent's content list + self._client.submit_transaction( + build_operation( + id=self._alias_parent, + path=["content"], + args={"id": self.id}, + command="listRemove", + ) + ) + + def move_to(self, target_block, position="last-child"): + assert isinstance( + target_block, Block + ), "target_block must be an instance of Block or one of its subclasses" + assert position in ["first-child", "last-child", "before", "after"] + + if "child" in position: + new_parent_id = target_block.id + new_parent_table = "block" + else: + new_parent_id = target_block.get("parent_id") + new_parent_table = target_block.get("parent_table") + + if position in ["first-child", "before"]: + list_command = "listBefore" + else: + list_command = "listAfter" + + list_args = {"id": self.id} + if position in ["before", "after"]: + list_args[position] = target_block.id + + with self._client.as_atomic_transaction(): + + # First, remove the node, before we re-insert and re-activate it at the target location + self.remove() + + if not self.is_alias: + # Set the parent_id of the moving block to the new parent, and mark it as active again + self._client.submit_transaction( + build_operation( + id=self.id, + path=[], + args={ + "alive": True, + "parent_id": new_parent_id, + "parent_table": new_parent_table, + }, + command="update", + ) + ) + else: + self._alias_parent = new_parent_id + + # Add the moving block's ID to the "content" list of the new parent + self._client.submit_transaction( + build_operation( + id=new_parent_id, + path=["content"], + args=list_args, + command=list_command, + ) + ) + + # update the local block cache to reflect the updates + self._client.refresh_records( + block=[ + self.id, + self.get("parent_id"), + target_block.id, + target_block.get("parent_id"), + ] + ) + + +class DividerBlock(Block): + + _type = "divider" + + +class ColumnListBlock(Block): + """ + Must contain only ColumnBlocks as children. + """ + + _type = "column_list" + + def evenly_space_columns(self): + with self._client.as_atomic_transaction(): + for child in self.children: + child.column_ratio = 1 / len(self.children) + + +class ColumnBlock(Block): + """ + Should be added as children of a ColumnListBlock. + """ + + column_ratio = field_map("format.column_ratio") + + _type = "column" + + +class BasicBlock(Block): + + title = property_map("title") + title_plaintext = property_map( + "title", + python_to_api=plaintext_to_notion, + api_to_python=notion_to_plaintext, + markdown=False, + ) + color = field_map("format.block_color") + + def convert_to_type(self, new_type): + """ + Convert this block into another type of BasicBlock. Returns a new instance of the appropriate class. + """ + assert new_type in BLOCK_TYPES and issubclass( + BLOCK_TYPES[new_type], BasicBlock + ), "Target type must correspond to a subclass of BasicBlock" + self.type = new_type + return self._client.get_block(self.id) + + def _str_fields(self): + return super()._str_fields() + ["title"] + + +class TodoBlock(BasicBlock): + + _type = "to_do" + + checked = property_map( + "checked", + python_to_api=lambda x: "Yes" if x else "No", + api_to_python=lambda x: x == "Yes", + ) + + def _str_fields(self): + return super()._str_fields() + ["checked"] + + +class CodeBlock(BasicBlock): + + _type = "code" + + language = property_map("language") + wrap = field_map("format.code_wrap") + + +class FactoryBlock(BasicBlock): + """ + Also known as a "Template Button". The title is the button text, and the children are the templates to clone. + """ + + _type = "factory" + + +class HeaderBlock(BasicBlock): + + _type = "header" + + +class SubheaderBlock(BasicBlock): + + _type = "sub_header" + + +class SubsubheaderBlock(BasicBlock): + + _type = "sub_sub_header" + + +class PageBlock(BasicBlock): + + _type = "page" + + icon = field_map( + "format.page_icon", + api_to_python=add_signed_prefix_as_needed, + python_to_api=remove_signed_prefix_as_needed, + ) + + cover = field_map( + "format.page_cover", + api_to_python=add_signed_prefix_as_needed, + python_to_api=remove_signed_prefix_as_needed, + ) + + locked = field_map("format.block_locked") + + def get_backlinks(self): + """ + Returns a list of blocks that referencing the current PageBlock. Note that only PageBlocks support backlinks. + """ + data = self._client.post("getBacklinksForBlock", {"blockId": self.id}).json() + backlinks = [] + for block in data.get("backlinks") or []: + mention = block.get("mentioned_from") + if not mention: + continue + block_id = mention.get("block_id") or mention.get("parent_block_id") + if block_id: + backlinks.append(self._client.get_block(block_id)) + return backlinks + + +class BulletedListBlock(BasicBlock): + + _type = "bulleted_list" + + +class NumberedListBlock(BasicBlock): + + _type = "numbered_list" + + +class ToggleBlock(BasicBlock): + + _type = "toggle" + + +class QuoteBlock(BasicBlock): + + _type = "quote" + + +class TextBlock(BasicBlock): + + _type = "text" + + +class EquationBlock(BasicBlock): + + latex = field_map( + ["properties", "title"], + python_to_api=lambda x: [[x]], + api_to_python=lambda x: x[0][0], + ) + + _type = "equation" + + +class MediaBlock(Block): + + caption = property_map("caption") + + def _str_fields(self): + return super()._str_fields() + ["caption"] + + +class EmbedBlock(MediaBlock): + + _type = "embed" + + display_source = field_map( + "format.display_source", + api_to_python=add_signed_prefix_as_needed, + python_to_api=remove_signed_prefix_as_needed, + ) + source = property_map( + "source", + api_to_python=add_signed_prefix_as_needed, + python_to_api=remove_signed_prefix_as_needed, + ) + height = field_map("format.block_height") + full_width = field_map("format.block_full_width") + page_width = field_map("format.block_page_width") + width = field_map("format.block_width") + + def set_source_url(self, url): + self.source = remove_signed_prefix_as_needed(url) + self.display_source = get_embed_link(self.source) + + def _str_fields(self): + return super()._str_fields() + ["source"] + + +class EmbedOrUploadBlock(EmbedBlock): + + file_id = field_map(["file_ids", 0]) + + def upload_file(self, path): + + mimetype = mimetypes.guess_type(path)[0] or "text/plain" + filename = os.path.split(path)[-1] + + data = self._client.post( + "getUploadFileUrl", + {"bucket": "secure", "name": filename, "contentType": mimetype}, + ).json() + + with open(path, "rb") as f: + response = requests.put( + data["signedPutUrl"], data=f, headers={"Content-type": mimetype} + ) + response.raise_for_status() + + self.display_source = data["url"] + self.source = data["url"] + self.file_id = data["url"][len(S3_URL_PREFIX) :].split("/")[0] + + +class VideoBlock(EmbedOrUploadBlock): + + _type = "video" + + +class FileBlock(EmbedOrUploadBlock): + + size = property_map("size") + title = property_map("title") + + _type = "file" + + +class AudioBlock(EmbedOrUploadBlock): + + _type = "audio" + + +class PDFBlock(EmbedOrUploadBlock): + + _type = "pdf" + + +class ImageBlock(EmbedOrUploadBlock): + + _type = "image" + + +class BookmarkBlock(EmbedBlock): + + _type = "bookmark" + + bookmark_cover = field_map("format.bookmark_cover") + bookmark_icon = field_map("format.bookmark_icon") + description = property_map("description") + link = property_map("link") + title = property_map("title") + + def set_new_link(self, url): + self._client.post("setBookmarkMetadata", {"blockId": self.id, "url": url}) + self.refresh() + + +class LinkToCollectionBlock(MediaBlock): + + _type = "link_to_collection" + # TODO: add custom fields + + +class BreadcrumbBlock(MediaBlock): + + _type = "breadcrumb" + + +class CollectionViewBlock(MediaBlock): + + _type = "collection_view" + + @property + def collection(self): + collection_id = self.get("collection_id") + if not collection_id: + return None + if not hasattr(self, "_collection"): + self._collection = self._client.get_collection(collection_id) + return self._collection + + @collection.setter + def collection(self, val): + if hasattr(self, "_collection"): + del self._collection + self.set("collection_id", val.id) + + @property + def views(self): + if not hasattr(self, "_views"): + self._views = CollectionViewBlockViews(parent=self) + return self._views + + @property + def title(self): + return self.collection.name + + @title.setter + def title(self, val): + self.collection.name = val + + @property + def description(self): + return self.collection.description + + @description.setter + def description(self, val): + self.collection.description = val + + locked = field_map("format.block_locked") + + def _str_fields(self): + return super()._str_fields() + ["title", "collection"] + + +class CollectionViewBlockViews(Children): + + child_list_key = "view_ids" + + def _get_block(self, view_id): + + view = self._client.get_collection_view( + view_id, collection=self._parent.collection + ) + + i = 0 + while view is None: + i += 1 + if i > 20: + return None + time.sleep(0.1) + view = self._client.get_collection_view( + view_id, collection=self._parent.collection + ) + + return view + + def add_new(self, view_type="table"): + if not self._parent.collection: + raise Exception( + "Collection view block does not have an associated collection: {}".format( + self._parent + ) + ) + + record_id = self._client.create_record( + table="collection_view", parent=self._parent, type=view_type + ) + view = self._client.get_collection_view( + record_id, collection=self._parent._collection + ) + view.set("collection_id", self._parent._collection.id) + view_ids = self._parent.get(CollectionViewBlockViews.child_list_key, []) + view_ids.append(view.id) + self._parent.set(CollectionViewBlockViews.child_list_key, view_ids) + + # At this point, the view does not see to be completely initialized yet. + # Hack: wait a bit before e.g. setting a query. + # Note: temporarily disabling this sleep to see if the issue reoccurs. + # time.sleep(3) + return view + + +class CollectionViewPageBlock(CollectionViewBlock): + + icon = field_map( + "format.page_icon", + api_to_python=add_signed_prefix_as_needed, + python_to_api=remove_signed_prefix_as_needed, + ) + + cover = field_map( + "format.page_cover", + api_to_python=add_signed_prefix_as_needed, + python_to_api=remove_signed_prefix_as_needed, + ) + + _type = "collection_view_page" + + +class FramerBlock(EmbedBlock): + + _type = "framer" + + +class TweetBlock(EmbedBlock): + + _type = "tweet" + + +class GistBlock(EmbedBlock): + + _type = "gist" + + +class DriveBlock(EmbedBlock): + + _type = "drive" + + +class FigmaBlock(EmbedBlock): + + _type = "figma" + + +class LoomBlock(EmbedBlock): + + _type = "loom" + + +class TypeformBlock(EmbedBlock): + + _type = "typeform" + + +class CodepenBlock(EmbedBlock): + + _type = "codepen" + + +class MapsBlock(EmbedBlock): + + _type = "maps" + + +class InvisionBlock(EmbedBlock): + + _type = "invision" + + +class CalloutBlock(BasicBlock): + + icon = field_map("format.page_icon") + + _type = "callout" + + +BLOCK_TYPES = { + cls._type: cls + for cls in locals().values() + if type(cls) == type and issubclass(cls, Block) and hasattr(cls, "_type") +} diff --git a/Python-Server/app/notion1/client.py b/Python-Server/app/notion1/client.py new file mode 100644 index 0000000..3d74893 --- /dev/null +++ b/Python-Server/app/notion1/client.py @@ -0,0 +1,437 @@ +import hashlib +import json +import re +import uuid + +from requests import Session, HTTPError +from requests.cookies import cookiejar_from_dict +from urllib.parse import urljoin +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry +from getpass import getpass + +from .block import Block, BLOCK_TYPES +from .collection import ( + Collection, + CollectionView, + CollectionRowBlock, + COLLECTION_VIEW_TYPES, + TemplateBlock, +) +from .logger import logger +from .monitor import Monitor +from .operations import operation_update_last_edited, build_operation +from .settings import API_BASE_URL +from .space import Space +from .store import RecordStore +from .user import User +from .utils import extract_id, now + + +def create_session(client_specified_retry=None): + """ + retry on 502 + """ + session = Session() + if client_specified_retry: + retry = client_specified_retry + else: + retry = Retry( + 5, + backoff_factor=0.3, + status_forcelist=(502, 503, 504), + # CAUTION: adding 'POST' to this list which is not technically idempotent + method_whitelist=( + "POST", + "HEAD", + "TRACE", + "GET", + "PUT", + "OPTIONS", + "DELETE", + ), + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount("https://", adapter) + return session + + +class NotionClient(object): + """ + This is the entry point to using the API. Create an instance of this class, passing it the value of the + "token_v2" cookie from a logged-in browser session on Notion.so. Most of the methods on here are primarily + for internal use -- the main one you'll likely want to use is `get_block`. + """ + + def __init__( + self, + token_v2=None, + monitor=False, + start_monitoring=False, + enable_caching=False, + cache_key=None, + email=None, + password=None, + client_specified_retry=None, + ): + self.session = create_session(client_specified_retry) + if token_v2: + self.session.cookies = cookiejar_from_dict({"token_v2": token_v2}) + else: + self._set_token(email=email, password=password) + + if enable_caching: + cache_key = cache_key or hashlib.sha256(token_v2.encode()).hexdigest() + self._store = RecordStore(self, cache_key=cache_key) + else: + self._store = RecordStore(self) + if monitor: + self._monitor = Monitor(self) + if start_monitoring: + self.start_monitoring() + else: + self._monitor = None + + self._update_user_info() + + def start_monitoring(self): + self._monitor.poll_async() + + def _fetch_guest_space_data(self, records): + """ + guest users have an empty `space` dict, so get the space_id from the `space_view` dict instead, + and fetch the space data from the getPublicSpaceData endpoint. + + Note: This mutates the records dict + """ + space_id = list(records["space_view"].values())[0]["value"]["space_id"] + + space_data = self.post( + "getPublicSpaceData", {"type": "space-ids", "spaceIds": [space_id]} + ).json() + + records["space"] = { + space["id"]: {"value": space} for space in space_data["results"] + } + + + def _set_token(self, email=None, password=None): + if not email: + email = input("Enter your Notion email address:\n") + if not password: + password = getpass("Enter your Notion password:\n") + self.post("loginWithEmail", {"email": email, "password": password}).json() + + def _update_user_info(self): + records = self.post("loadUserContent", {}).json()["recordMap"] + if not records["space"]: + self._fetch_guest_space_data(records) + + self._store.store_recordmap(records) + self.current_user = self.get_user(list(records["notion_user"].keys())[0]) + self.current_space = self.get_space(list(records["space"].keys())[0]) + return records + + def get_email_uid(self): + response = self.post("getSpaces", {}).json() + return { + response[uid]["notion_user"][uid]["value"]["email"]: uid + for uid in response.keys() + } + + def set_user_by_uid(self, user_id): + self.session.headers.update({"x-notion-active-user-header": user_id}) + self._update_user_info() + + def set_user_by_email(self, email): + email_uid_dict = self.get_email_uid() + uid = email_uid_dict.get(email) + if not uid: + raise Exception( + "Requested email address {email} not found; available addresses: {available}".format( + email=email, available=list(email_uid_dict) + ) + ) + self.set_user_by_uid(uid) + + def get_top_level_pages(self): + records = self._update_user_info() + return [self.get_block(bid) for bid in records["block"].keys()] + + def get_record_data(self, table, id, force_refresh=False): + return self._store.get(table, id, force_refresh=force_refresh) + + def get_block(self, url_or_id, force_refresh=False): + """ + Retrieve an instance of a subclass of Block that maps to the block/page identified by the URL or ID passed in. + """ + block_id = extract_id(url_or_id) + block = self.get_record_data("block", block_id, force_refresh=force_refresh) + if not block: + return None + if block.get("parent_table") == "collection": + if block.get("is_template"): + block_class = TemplateBlock + else: + block_class = CollectionRowBlock + else: + block_class = BLOCK_TYPES.get(block.get("type", ""), Block) + return block_class(self, block_id) + + def get_collection(self, collection_id, force_refresh=False): + """ + Retrieve an instance of Collection that maps to the collection identified by the ID passed in. + """ + coll = self.get_record_data( + "collection", collection_id, force_refresh=force_refresh + ) + return Collection(self, collection_id) if coll else None + + def get_user(self, user_id, force_refresh=False): + """ + Retrieve an instance of User that maps to the notion_user identified by the ID passed in. + """ + user = self.get_record_data("notion_user", user_id, force_refresh=force_refresh) + return User(self, user_id) if user else None + + def get_space(self, space_id, force_refresh=False): + """ + Retrieve an instance of Space that maps to the space identified by the ID passed in. + """ + space = self.get_record_data("space", space_id, force_refresh=force_refresh) + return Space(self, space_id) if space else None + + def get_collection_view(self, url_or_id, collection=None, force_refresh=False): + """ + Retrieve an instance of a subclass of CollectionView that maps to the appropriate type. + The `url_or_id` argument can either be the URL for a database page, or the ID of a collection_view (in which case + you must also pass the collection) + """ + # if it's a URL for a database page, try extracting the collection and view IDs + if url_or_id.startswith("http"): + match = re.search("([a-f0-9]{32})\?v=([a-f0-9]{32})", url_or_id) + if not match: + raise Exception("Invalid collection view URL") + block_id, view_id = match.groups() + collection = self.get_block( + block_id, force_refresh=force_refresh + ).collection + else: + view_id = url_or_id + assert ( + collection is not None + ), "If 'url_or_id' is an ID (not a URL), you must also pass the 'collection'" + + view = self.get_record_data( + "collection_view", view_id, force_refresh=force_refresh + ) + + return ( + COLLECTION_VIEW_TYPES.get(view.get("type", ""), CollectionView)( + self, view_id, collection=collection + ) + if view + else None + ) + + def refresh_records(self, **kwargs): + """ + The keyword arguments map table names into lists of (or singular) record IDs to load for that table. + Use `True` instead of a list to refresh all known records for that table. + """ + self._store.call_get_record_values(**kwargs) + + def refresh_collection_rows(self, collection_id): + row_ids = [row.id for row in self.get_collection(collection_id).get_rows()] + self._store.set_collection_rows(collection_id, row_ids) + + def post(self, endpoint, data): + """ + All API requests on Notion.so are done as POSTs (except the websocket communications). + """ + url = urljoin(API_BASE_URL, endpoint) + response = self.session.post(url, json=data) + if response.status_code == 400: + logger.error( + "Got 400 error attempting to POST to {}, with data: {}".format( + endpoint, json.dumps(data, indent=2) + ) + ) + raise HTTPError( + response.json().get( + "message", "There was an error (400) submitting the request." + ) + ) + response.raise_for_status() + return response + + def submit_transaction(self, operations, update_last_edited=True): + + if not operations: + return + + if isinstance(operations, dict): + operations = [operations] + + if update_last_edited: + updated_blocks = set( + [op["id"] for op in operations if op["table"] == "block"] + ) + operations += [ + operation_update_last_edited(self.current_user.id, block_id) + for block_id in updated_blocks + ] + + # if we're in a transaction, just add these operations to the list; otherwise, execute them right away + if self.in_transaction(): + self._transaction_operations += operations + else: + data = {"operations": operations} + self.post("submitTransaction", data) + self._store.run_local_operations(operations) + + def query_collection(self, *args, **kwargs): + return self._store.call_query_collection(*args, **kwargs) + + def as_atomic_transaction(self): + """ + Returns a context manager that buffers up all calls to `submit_transaction` and sends them as one big transaction + when the context manager exits. + """ + return Transaction(client=self) + + def in_transaction(self): + """ + Returns True if we're currently in a transaction, otherwise False. + """ + return hasattr(self, "_transaction_operations") + + def search_pages_with_parent(self, parent_id, search=""): + data = { + "query": search, + "parentId": parent_id, + "limit": 10000, + "spaceId": self.current_space.id, + } + response = self.post("searchPagesWithParent", data).json() + self._store.store_recordmap(response["recordMap"]) + return response["results"] + + def search_blocks(self, search, limit=25): + return self.search(query=search, limit=limit) + + def search( + self, + query="", + search_type="BlocksInSpace", + limit=100, + sort="Relevance", + source="quick_find", + isDeletedOnly=False, + excludeTemplates=False, + isNavigableOnly=False, + requireEditPermissions=False, + ancestors=[], + createdBy=[], + editedBy=[], + lastEditedTime={}, + createdTime={}, + ): + data = { + "type": search_type, + "query": query, + "spaceId": self.current_space.id, + "limit": limit, + "filters": { + "isDeletedOnly": isDeletedOnly, + "excludeTemplates": excludeTemplates, + "isNavigableOnly": isNavigableOnly, + "requireEditPermissions": requireEditPermissions, + "ancestors": ancestors, + "createdBy": createdBy, + "editedBy": editedBy, + "lastEditedTime": lastEditedTime, + "createdTime": createdTime, + }, + "sort": sort, + "source": source, + } + response = self.post("search", data).json() + self._store.store_recordmap(response["recordMap"]) + return [self.get_block(result["id"]) for result in response["results"]] + + def create_record(self, table, parent, **kwargs): + + # make up a new UUID; apparently we get to choose our own! + record_id = str(uuid.uuid4()) + + child_list_key = kwargs.get("child_list_key") or parent.child_list_key + + args = { + "id": record_id, + "version": 1, + "alive": True, + "created_by_id": self.current_user.id, + "created_by_table": "notion_user", + "created_time": now(), + "parent_id": parent.id, + "parent_table": parent._table, + } + + args.update(kwargs) + + with self.as_atomic_transaction(): + + # create the new record + self.submit_transaction( + build_operation( + args=args, command="set", id=record_id, path=[], table=table + ) + ) + + # add the record to the content list of the parent, if needed + if child_list_key: + self.submit_transaction( + build_operation( + id=parent.id, + path=[child_list_key], + args={"id": record_id}, + command="listAfter", + table=parent._table, + ) + ) + + return record_id + + +class Transaction(object): + + is_dummy_nested_transaction = False + + def __init__(self, client): + self.client = client + + def __enter__(self): + + if hasattr(self.client, "_transaction_operations"): + # client is already in a transaction, so we'll just make this one a nullop and let the outer one handle it + self.is_dummy_nested_transaction = True + return + + self.client._transaction_operations = [] + self.client._pages_to_refresh = [] + self.client._blocks_to_refresh = [] + + def __exit__(self, exc_type, exc_value, traceback): + + if self.is_dummy_nested_transaction: + return + + operations = self.client._transaction_operations + del self.client._transaction_operations + + # only actually submit the transaction if there was no exception + if not exc_type: + self.client.submit_transaction(operations) + + self.client._store.handle_post_transaction_refreshing() diff --git a/Python-Server/app/notion1/collection.py b/Python-Server/app/notion1/collection.py new file mode 100644 index 0000000..0b381c4 --- /dev/null +++ b/Python-Server/app/notion1/collection.py @@ -0,0 +1,799 @@ +from cached_property import cached_property +from copy import deepcopy +from datetime import datetime, date +from tzlocal import get_localzone +from uuid import uuid1 + +from .block import Block, PageBlock, Children, CollectionViewBlock +from .logger import logger +from .maps import property_map, field_map +from .markdown import markdown_to_notion, notion_to_markdown +from .operations import build_operation +from .records import Record +from .utils import ( + add_signed_prefix_as_needed, + extract_id, + remove_signed_prefix_as_needed, + slugify, +) + + +class NotionDate(object): + + start = None + end = None + timezone = None + reminder = None + + def __init__(self, start, end=None, timezone=None, reminder=None): + self.start = start + self.end = end + self.timezone = timezone + self.reminder = reminder + + @classmethod + def from_notion(cls, obj): + if isinstance(obj, dict): + data = obj + elif isinstance(obj, list): + data = obj[0][1][0][1] + else: + return None + start = cls._parse_datetime(data.get("start_date"), data.get("start_time")) + end = cls._parse_datetime(data.get("end_date"), data.get("end_time")) + timezone = data.get("time_zone") + reminder = data.get("reminder") + return cls(start, end=end, timezone=timezone, reminder=reminder) + + @classmethod + def _parse_datetime(cls, date_str, time_str): + if not date_str: + return None + if time_str: + return datetime.strptime(date_str + " " + time_str, "%Y-%m-%d %H:%M") + else: + return datetime.strptime(date_str, "%Y-%m-%d").date() + + def _format_datetime(self, date_or_datetime): + if not date_or_datetime: + return None, None + if isinstance(date_or_datetime, datetime): + return ( + date_or_datetime.strftime("%Y-%m-%d"), + date_or_datetime.strftime("%H:%M"), + ) + else: + return date_or_datetime.strftime("%Y-%m-%d"), None + + def type(self): + name = "date" + if isinstance(self.start, datetime): + name += "time" + if self.end: + name += "range" + return name + + def to_notion(self): + if self.end: + self.start, self.end = sorted([self.start, self.end]) + + start_date, start_time = self._format_datetime(self.start) + end_date, end_time = self._format_datetime(self.end) + reminder = self.reminder + + if not start_date: + return [] + + data = {"type": self.type(), "start_date": start_date} + + if end_date: + data["end_date"] = end_date + + if reminder: + data["reminder"] = reminder + + if "time" in data["type"]: + data["time_zone"] = str(self.timezone or get_localzone()) + data["start_time"] = start_time or "00:00" + if end_date: + data["end_time"] = end_time or "00:00" + + return [["‣", [["d", data]]]] + + +class NotionSelect(object): + valid_colors = [ + "default", + "gray", + "brown", + "orange", + "yellow", + "green", + "blue", + "purple", + "pink", + "red", + ] + id = None + color = "default" + value = None + + def __init__(self, value, color="default"): + self.id = str(uuid1()) + self.color = self.set_color(color) + self.value = value + + def set_color(self, color): + if color not in self.valid_colors: + if self.color: + return self.color + return "default" + return color + + def to_dict(self): + return {"id": self.id, "value": self.value, "color": self.color} + + +class Collection(Record): + """ + A "collection" corresponds to what's sometimes called a "database" in the Notion UI. + """ + + _table = "collection" + + name = field_map( + "name", api_to_python=notion_to_markdown, python_to_api=markdown_to_notion + ) + description = field_map( + "description", + api_to_python=notion_to_markdown, + python_to_api=markdown_to_notion, + ) + cover = field_map("cover") + + @property + def templates(self): + if not hasattr(self, "_templates"): + template_ids = self.get("template_pages", []) + self._client.refresh_records(block=template_ids) + self._templates = Templates(parent=self) + return self._templates + + def get_schema_properties(self): + """ + Fetch a flattened list of all properties in the collection's schema. + """ + properties = [] + schema = self.get("schema") + for id, item in schema.items(): + prop = {"id": id, "slug": slugify(item["name"])} + prop.update(item) + properties.append(prop) + return properties + + def check_schema_select_options(self, prop, values): + """ + Check and update the prop dict with new values + """ + schema_update = False + current_options = list([p["value"].lower() for p in prop["options"]]) + if not isinstance(values, list): + values = [values] + for v in values: + if v and v.lower() not in current_options: + schema_update = True + prop["options"].append(NotionSelect(v).to_dict()) + return schema_update, prop + + def get_schema_property(self, identifier): + """ + Look up a property in the collection's schema, by "property id" (generally a 4-char string), + or name (human-readable -- there may be duplicates, so we pick the first match we find). + """ + for prop in self.get_schema_properties(): + if identifier == prop["id"] or slugify(identifier) == prop["slug"]: + return prop + if identifier == "title" and prop["type"] == "title": + return prop + return None + + def add_row(self, update_views=True, **kwargs): + """ + Create a new empty CollectionRowBlock under this collection, and return the instance. + """ + + row_id = self._client.create_record("block", self, type="page") + row = CollectionRowBlock(self._client, row_id) + + with self._client.as_atomic_transaction(): + for key, val in kwargs.items(): + setattr(row, key, val) + + if update_views: + # make sure the new record is inserted at the end of each view + for view in self.parent.views: + if view is None or isinstance(view, CalendarView): + continue + view.set("page_sort", view.get("page_sort", []) + [row_id]) + + return row + + @property + def parent(self): + assert self.get("parent_table") == "block" + return self._client.get_block(self.get("parent_id")) + + def _get_a_collection_view(self): + """ + Get an arbitrary collection view for this collection, to allow querying. + """ + parent = self.parent + assert isinstance(parent, CollectionViewBlock) + assert len(parent.views) > 0 + return parent.views[0] + + def query(self, **kwargs): + return CollectionQuery(self, self._get_a_collection_view(), **kwargs).execute() + + def get_rows(self, **kwargs): + return self.query(**kwargs) + + def _convert_diff_to_changelist(self, difference, old_val, new_val): + + changes = [] + remaining = [] + + for operation, path, values in difference: + + if path == "rows": + changes.append((operation, path, values)) + else: + remaining.append((operation, path, values)) + + return changes + super()._convert_diff_to_changelist( + remaining, old_val, new_val + ) + + +class CollectionView(Record): + """ + A "view" is a particular visualization of a collection, with a "type" (board, table, list, etc) + and filters, sort, etc. + """ + + _table = "collection_view" + + name = field_map("name") + type = field_map("type") + + @property + def parent(self): + assert self.get("parent_table", "block") + return self._client.get_block(self.get("parent_id")) + + def __init__(self, *args, collection, **kwargs): + self.collection = collection + super().__init__(*args, **kwargs) + + def build_query(self, **kwargs): + return CollectionQuery( + collection=self.collection, collection_view=self, **kwargs + ) + + def default_query(self): + return self.build_query(**self.get("query", {})) + + +class BoardView(CollectionView): + + _type = "board" + + group_by = field_map("query.group_by") + + +class TableView(CollectionView): + + _type = "table" + + +class ListView(CollectionView): + + _type = "list" + + +class CalendarView(CollectionView): + + _type = "calendar" + + def build_query(self, **kwargs): + calendar_by = self._client.get_record_data("collection_view", self._id)[ + "query2" + ]["calendar_by"] + return super().build_query(calendar_by=calendar_by, **kwargs) + + +class GalleryView(CollectionView): + + _type = "gallery" + + +def _normalize_property_name(prop_name, collection): + if not prop_name: + return "" + else: + prop = collection.get_schema_property(prop_name) + if not prop: + return "" + return prop["id"] + + +def _normalize_query_data(data, collection, recursing=False): + if not recursing: + data = deepcopy(data) + if isinstance(data, list): + return [ + _normalize_query_data(item, collection, recursing=True) for item in data + ] + elif isinstance(data, dict): + # convert slugs to property ids + if "property" in data: + data["property"] = _normalize_property_name(data["property"], collection) + # convert any instantiated objects into their ids + if "value" in data: + if hasattr(data["value"], "id"): + data["value"] = data["value"].id + for key in data: + data[key] = _normalize_query_data(data[key], collection, recursing=True) + return data + + +class CollectionQuery(object): + def __init__( + self, + collection, + collection_view, + search="", + type="table", + aggregate=[], + aggregations=[], + filter=[], + sort=[], + calendar_by="", + group_by="", + ): + assert not ( + aggregate and aggregations + ), "Use only one of `aggregate` or `aggregations` (old vs new format)" + self.collection = collection + self.collection_view = collection_view + self.search = search + self.type = type + self.aggregate = _normalize_query_data(aggregate, collection) + self.aggregations = _normalize_query_data(aggregations, collection) + self.filter = _normalize_query_data(filter, collection) + self.sort = _normalize_query_data(sort, collection) + self.calendar_by = _normalize_property_name(calendar_by, collection) + self.group_by = _normalize_property_name(group_by, collection) + self._client = collection._client + + def execute(self): + + result_class = QUERY_RESULT_TYPES.get(self.type, QueryResult) + + return result_class( + self.collection, + self._client.query_collection( + collection_id=self.collection.id, + collection_view_id=self.collection_view.id, + search=self.search, + type=self.type, + aggregate=self.aggregate, + aggregations=self.aggregations, + filter=self.filter, + sort=self.sort, + calendar_by=self.calendar_by, + group_by=self.group_by, + ), + self, + ) + + +class CollectionRowBlock(PageBlock): + @property + def is_template(self): + return self.get("is_template") + + @cached_property + def collection(self): + return self._client.get_collection(self.get("parent_id")) + + @property + def schema(self): + return [ + prop + for prop in self.collection.get_schema_properties() + if prop["type"] not in ["formula", "rollup"] + ] + + def __getattr__(self, attname): + return self.get_property(attname) + + def __setattr__(self, attname, value): + if attname.startswith("_"): + # we only allow setting of new non-property attributes that start with "_" + super().__setattr__(attname, value) + elif attname in self._get_property_slugs(): + self.set_property(attname, value) + elif slugify(attname) in self._get_property_slugs(): + self.set_property(slugify(attname), value) + elif hasattr(self, attname): + super().__setattr__(attname, value) + else: + raise AttributeError("Unknown property: '{}'".format(attname)) + + def _get_property_slugs(self): + slugs = [prop["slug"] for prop in self.schema] + if "title" not in slugs: + slugs.append("title") + return slugs + + def __dir__(self): + return self._get_property_slugs() + super().__dir__() + + def get_property(self, identifier): + + prop = self.collection.get_schema_property(identifier) + if prop is None: + raise AttributeError( + "Object does not have property '{}'".format(identifier) + ) + + val = self.get(["properties", prop["id"]]) + + return self._convert_notion_to_python(val, prop) + + def _convert_diff_to_changelist(self, difference, old_val, new_val): + + changed_props = set() + changes = [] + remaining = [] + + for d in difference: + operation, path, values = d + path = path.split(".") if isinstance(path, str) else path + if path and path[0] == "properties": + if len(path) > 1: + changed_props.add(path[1]) + else: + for item in values: + changed_props.add(item[0]) + else: + remaining.append(d) + + for prop_id in changed_props: + prop = self.collection.get_schema_property(prop_id) + old = self._convert_notion_to_python( + old_val.get("properties", {}).get(prop_id), prop + ) + new = self._convert_notion_to_python( + new_val.get("properties", {}).get(prop_id), prop + ) + changes.append(("prop_changed", prop["slug"], (old, new))) + + return changes + super()._convert_diff_to_changelist( + remaining, old_val, new_val + ) + + def _convert_notion_to_python(self, val, prop): + + if prop["type"] in ["title", "text"]: + val = notion_to_markdown(val) if val else "" + if prop["type"] in ["number"]: + if val is not None: + val = val[0][0] + if "." in val: + val = float(val) + else: + val = int(val) + if prop["type"] in ["select"]: + val = val[0][0] if val else None + if prop["type"] in ["multi_select"]: + val = [v.strip() for v in val[0][0].split(",")] if val else [] + if prop["type"] in ["person"]: + val = ( + [self._client.get_user(item[1][0][1]) for item in val if item[0] == "‣"] + if val + else [] + ) + if prop["type"] in ["email", "phone_number", "url"]: + val = val[0][0] if val else "" + if prop["type"] in ["date"]: + val = NotionDate.from_notion(val) + if prop["type"] in ["file"]: + val = ( + [ + add_signed_prefix_as_needed( + item[1][0][1], client=self._client, id=self.id + ) + for item in val + if item[0] != "," + ] + if val + else [] + ) + if prop["type"] in ["checkbox"]: + val = val[0][0] == "Yes" if val else False + if prop["type"] in ["relation"]: + val = ( + [ + self._client.get_block(item[1][0][1]) + for item in val + if item[0] == "‣" + ] + if val + else [] + ) + if prop["type"] in ["created_time", "last_edited_time"]: + val = self.get(prop["type"]) + val = datetime.utcfromtimestamp(val / 1000) + if prop["type"] in ["created_by", "last_edited_by"]: + val = self.get(prop["type"] + "_id") + val = self._client.get_user(val) + + return val + + def get_all_properties(self): + allprops = {} + for prop in self.schema: + propid = slugify(prop["name"]) + allprops[propid] = self.get_property(propid) + return allprops + + def set_property(self, identifier, val): + + prop = self.collection.get_schema_property(identifier) + if prop is None: + raise AttributeError( + "Object does not have property '{}'".format(identifier) + ) + if prop["type"] in ["select"] or prop["type"] in ["multi_select"]: + schema_update, prop = self.collection.check_schema_select_options(prop, val) + if schema_update: + self.collection.set( + "schema.{}.options".format(prop["id"]), prop["options"] + ) + + path, val = self._convert_python_to_notion(val, prop, identifier=identifier) + + self.set(path, val) + + def _convert_python_to_notion(self, val, prop, identifier=""): + + if prop["type"] in ["title", "text"]: + if not val: + val = "" + if not isinstance(val, str): + raise TypeError( + "Value passed to property '{}' must be a string.".format(identifier) + ) + val = markdown_to_notion(val) + if prop["type"] in ["number"]: + if val is not None: + if not isinstance(val, float) and not isinstance(val, int): + raise TypeError( + "Value passed to property '{}' must be an int or float.".format( + identifier + ) + ) + val = [[str(val)]] + if prop["type"] in ["select"]: + if not val: + val = None + else: + valid_options = [p["value"].lower() for p in prop["options"]] + val = val.split(",")[0] + if val.lower() not in valid_options: + raise ValueError( + "Value '{}' not acceptable for property '{}' (valid options: {})".format( + val, identifier, valid_options + ) + ) + val = [[val]] + if prop["type"] in ["multi_select"]: + if not val: + val = [] + valid_options = [p["value"].lower() for p in prop["options"]] + if not isinstance(val, list): + val = [val] + for v in val: + if v and v.lower() not in valid_options: + raise ValueError( + "Value '{}' not acceptable for property '{}' (valid options: {})".format( + v, identifier, valid_options + ) + ) + val = [[",".join(val)]] + if prop["type"] in ["person"]: + userlist = [] + if not isinstance(val, list): + val = [val] + for user in val: + user_id = user if isinstance(user, str) else user.id + userlist += [["‣", [["u", user_id]]], [","]] + val = userlist[:-1] + if prop["type"] in ["email", "phone_number", "url"]: + val = [[val, [["a", val]]]] + if prop["type"] in ["date"]: + if isinstance(val, date) or isinstance(val, datetime): + val = NotionDate(val) + if isinstance(val, NotionDate): + val = val.to_notion() + else: + val = [] + if prop["type"] in ["file"]: + filelist = [] + if not isinstance(val, list): + val = [val] + for url in val: + url = remove_signed_prefix_as_needed(url) + filename = url.split("/")[-1] + filelist += [[filename, [["a", url]]], [","]] + val = filelist[:-1] + if prop["type"] in ["checkbox"]: + if not isinstance(val, bool): + raise TypeError( + "Value passed to property '{}' must be a bool.".format(identifier) + ) + val = [["Yes" if val else "No"]] + if prop["type"] in ["relation"]: + pagelist = [] + if not isinstance(val, list): + val = [val] + for page in val: + if isinstance(page, str): + page = self._client.get_block(page) + pagelist += [["‣", [["p", page.id]]], [","]] + val = pagelist[:-1] + if prop["type"] in ["created_time", "last_edited_time"]: + val = int(val.timestamp() * 1000) + return prop["type"], val + if prop["type"] in ["created_by", "last_edited_by"]: + val = val if isinstance(val, str) else val.id + return prop["type"], val + + return ["properties", prop["id"]], val + + def remove(self): + # Mark the block as inactive + self._client.submit_transaction( + build_operation( + id=self.id, path=[], args={"alive": False}, command="update" + ) + ) + + +class TemplateBlock(CollectionRowBlock): + @property + def is_template(self): + return self.get("is_template") + + @is_template.setter + def is_template(self, val): + assert val is True, "Templates must have 'is_template' set to True." + self.set("is_template", True) + + +class Templates(Children): + + child_list_key = "template_pages" + + def _content_list(self): + return self._parent.get(self.child_list_key) or [] + + def add_new(self, **kwargs): + + kwargs["block_type"] = "page" + kwargs["child_list_key"] = self.child_list_key + kwargs["is_template"] = True + + return super().add_new(**kwargs) + + +class QueryResult(object): + def __init__(self, collection, result, query): + self.collection = collection + self._client = collection._client + self._block_ids = self._get_block_ids(result) + self.aggregates = result.get("aggregationResults", []) + self.aggregate_ids = [ + agg.get("id") for agg in (query.aggregate or query.aggregations) + ] + self.query = query + + def _get_block_ids(self, result): + return result["blockIds"] + + def _get_block(self, id): + block = CollectionRowBlock(self._client, id) + block.__dict__["collection"] = self.collection + return block + + def get_aggregate(self, id): + for agg_id, agg in zip(self.aggregate_ids, self.aggregates): + if id == agg_id: + return agg["value"] + return None + + def __repr__(self): + if not len(self): + return "[]" + rep = "[\n" + for child in self: + rep += " {},\n".format(repr(child)) + rep += "]" + return rep + + def __len__(self): + return len(self._block_ids) + + def __getitem__(self, key): + return list(iter(self))[key] + + def __iter__(self): + return iter(self._get_block(id) for id in self._block_ids) + + def __reversed__(self): + return reversed(iter(self)) + + def __contains__(self, item): + if isinstance(item, str): + item_id = extract_id(item) + elif isinstance(item, Block): + item_id = item.id + else: + return False + return item_id in self._block_ids + + +class TableQueryResult(QueryResult): + + _type = "table" + + +class BoardQueryResult(QueryResult): + + _type = "board" + + +class CalendarQueryResult(QueryResult): + + _type = "calendar" + + def _get_block_ids(self, result): + block_ids = [] + for week in result["weeks"]: + block_ids += week["items"] + return block_ids + + +class ListQueryResult(QueryResult): + + _type = "list" + + +class GalleryQueryResult(QueryResult): + + _type = "gallery" + + +COLLECTION_VIEW_TYPES = { + cls._type: cls + for cls in locals().values() + if type(cls) == type and issubclass(cls, CollectionView) and hasattr(cls, "_type") +} + +QUERY_RESULT_TYPES = { + cls._type: cls + for cls in locals().values() + if type(cls) == type and issubclass(cls, QueryResult) and hasattr(cls, "_type") +} diff --git a/Python-Server/app/notion1/logger.py b/Python-Server/app/notion1/logger.py new file mode 100644 index 0000000..10e4c40 --- /dev/null +++ b/Python-Server/app/notion1/logger.py @@ -0,0 +1,43 @@ +import logging +import os + +from .settings import LOG_FILE + + +NOTIONPY_LOG_LEVEL = os.environ.get("NOTIONPY_LOG_LEVEL", "warning").lower() + +logger = logging.getLogger("notion") + + +def enable_debugging(): + set_log_level(logging.DEBUG) + + +def set_log_level(level): + logger.setLevel(level) + handler.setLevel(level) + + +if NOTIONPY_LOG_LEVEL == "disabled": + handler = logging.NullHandler() + logger.addHandler(handler) +else: + handler = logging.FileHandler(LOG_FILE) + formatter = logging.Formatter("\n%(asctime)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + + if NOTIONPY_LOG_LEVEL == "debug": + set_log_level(logging.DEBUG) + elif NOTIONPY_LOG_LEVEL == "info": + set_log_level(logging.INFO) + elif NOTIONPY_LOG_LEVEL == "warning": + set_log_level(logging.WARNING) + elif NOTIONPY_LOG_LEVEL == "error": + set_log_level(logging.ERROR) + else: + raise Exception( + "Invalid value for environment variable NOTIONPY_LOG_LEVEL: {}".format( + NOTIONPY_LOG_LEVEL + ) + ) diff --git a/Python-Server/app/notion1/maps.py b/Python-Server/app/notion1/maps.py new file mode 100644 index 0000000..6d79e98 --- /dev/null +++ b/Python-Server/app/notion1/maps.py @@ -0,0 +1,109 @@ +from inspect import signature + +from .logger import logger +from .markdown import markdown_to_notion, notion_to_markdown + + +class mapper(property): + def __init__(self, path, python_to_api, api_to_python, *args, **kwargs): + self.python_to_api = python_to_api + self.api_to_python = api_to_python + self.path = ( + ".".join(map(str, path)) + if isinstance(path, list) or isinstance(path, tuple) + else path + ) + super().__init__(*args, **kwargs) + + +def field_map(path, python_to_api=lambda x: x, api_to_python=lambda x: x): + """ + Returns a property that maps a Block attribute onto a field in the API data structures. + + - `path` can either be a top-level field-name, a list that specifies the key names to traverse, + or a dot-delimited string representing the same traversal. + + - `python_to_api` is a function that converts values as given in the Python layer into the + internal representation to be sent along in the API request. + + - `api_to_python` is a function that converts what is received from the API into an internal + representation to be returned to the Python layer. + """ + + if isinstance(path, str): + path = path.split(".") + + def fget(self): + kwargs = {} + if ( + "client" in signature(api_to_python).parameters + and "id" in signature(api_to_python).parameters + ): + kwargs["client"] = self._client + kwargs["id"] = self.id + return api_to_python(self.get(path), **kwargs) + + def fset(self, value): + kwargs = {} + if "client" in signature(python_to_api).parameters: + kwargs["client"] = self._client + self.set(path, python_to_api(value, **kwargs)) + + return mapper( + fget=fget, + fset=fset, + path=path, + python_to_api=python_to_api, + api_to_python=api_to_python, + ) + + +def property_map( + name, python_to_api=lambda x: x, api_to_python=lambda x: x, markdown=True +): + """ + Similar to `field_map`, except it works specifically with the data under the "properties" field + in the API's block table, and just takes a single name to specify which subkey to reference. + Also, these properties all seem to use a special "embedded list" format that breaks the text + up into a sequence of chunks and associated format metadata. If `markdown` is True, we convert + this representation into commonmark-compatible markdown, and back again when saving. + """ + + def py2api(x, client=None): + kwargs = {} + if "client" in signature(python_to_api).parameters: + kwargs["client"] = client + x = python_to_api(x, **kwargs) + if markdown: + x = markdown_to_notion(x) + return x + + def api2py(x, client=None, id=""): + x = x or [[""]] + if markdown: + x = notion_to_markdown(x) + kwargs = {} + params = signature(api_to_python).parameters + if "client" in params: + kwargs["client"] = client + if "id" in params: + kwargs["id"] = id + return api_to_python(x, **kwargs) + + return field_map(["properties", name], python_to_api=py2api, api_to_python=api2py) + + +def joint_map(*mappings): + """ + Combine multiple `field_map` and `property_map` instances together to map an attribute to multiple API fields. + Note: when "getting", the first one will be used. When "setting", they will all be set in parallel. + """ + + def fget(self): + return mappings[0].fget(self) + + def fset(self, value): + for m in mappings: + m.fset(self, value) + + return property(fget=fget, fset=fset) diff --git a/Python-Server/app/notion1/markdown.py b/Python-Server/app/notion1/markdown.py new file mode 100644 index 0000000..88284cc --- /dev/null +++ b/Python-Server/app/notion1/markdown.py @@ -0,0 +1,357 @@ +import commonmark +import re +import html +from xml.dom import minidom + +from commonmark.dump import prepare + + +delimiters = { + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "|", + "}", + "~", + "☃", + " ", + "\t", + "\n", + "\x0b", + "\x0c", + "\r", + "\x1c", + "\x1d", + "\x1e", + "\x1f", + "\x85", + "\xa0", + "\u1680", + "\u2000", + "\u2001", + "\u2002", + "\u2003", + "\u2004", + "\u2005", + "\u2006", + "\u2007", + "\u2008", + "\u2009", + "\u200a", + "\u2028", + "\u2029", + "\u202f", + "\u205f", + "\u3000", +} + +_NOTION_TO_MARKDOWN_MAPPER = {"i": "☃", "b": "☃☃", "s": "~~", "c": "`", "e": "$$"} + +FORMAT_PRECEDENCE = ["s", "b", "i", "a", "c", "e"] + + +def _extract_text_and_format_from_ast(item): + + if item["type"] == "html_inline": + if item.get("literal", "") == "": + return "", ("s",) + if item.get("literal", "").startswith("" + ).documentElement + equation = elem.attributes["equation"].value + return "", ("e", equation) + + if item["type"] == "emph": + return item.get("literal", ""), ("i",) + + if item["type"] == "strong": + return item.get("literal", ""), ("b",) + + if item["type"] == "code": + return item.get("literal", ""), ("c",) + + if item["type"] == "link": + return item.get("literal", ""), ("a", item.get("destination", "#")) + + return item.get("literal", ""), () + + +def _get_format(notion_segment, as_set=False): + if len(notion_segment) == 1: + if as_set: + return set() + else: + return [] + else: + if as_set: + return set([tuple(f) for f in notion_segment[1]]) + else: + return notion_segment[1] + + +def markdown_to_notion(markdown): + + if not isinstance(markdown, str): + markdown = str(markdown) + + # commonmark doesn't support strikethrough, so we need to handle it ourselves + while markdown.count("~~") >= 2: + markdown = markdown.replace("~~", "", 1) + markdown = markdown.replace("~~", "", 1) + + # commonmark doesn't support latex blocks, so we need to handle it ourselves + def handle_latex(match): + return '\u204d'.format( + html.escape(match.group(0)[2:-2]) + ) + + markdown = re.sub( + r"(?": + format.remove(("s",)) + literal = "" + + if item["type"] == "html_inline" and literal == "": + for f in filter(lambda f: f[0] == "e", format): + format.remove(f) + break + literal = "" + + if item["type"] == "softbreak": + literal = "\n" + + if literal: + notion.append( + [literal, [list(f) for f in sorted(format)]] + if format + else [literal] + ) + + # in the ast format, code blocks are meant to be immediately self-closing + if ("c",) in format: + format.remove(("c",)) + + # remove any trailing newlines from automatic closing paragraph markers + if notion: + notion[-1][0] = notion[-1][0].rstrip("\n") + + # consolidate any adjacent text blocks with identical styles + consolidated = [] + for item in notion: + if consolidated and _get_format(consolidated[-1], as_set=True) == _get_format( + item, as_set=True + ): + consolidated[-1][0] += item[0] + elif item[0]: + consolidated.append(item) + + return cleanup_dashes(consolidated) + + +def cleanup_dashes(thing): + regex_pattern = re.compile("⸻|%E2%B8%BB") + if type(thing) is list: + for counter, value in enumerate(thing): + thing[counter] = cleanup_dashes(value) + elif type(thing) is str: + return regex_pattern.sub("-", thing) + + return thing + + +def notion_to_markdown(notion): + + markdown_chunks = [] + + use_underscores = True + + for item in notion or []: + + markdown = "" + + text = item[0] + format = item[1] if len(item) == 2 else [] + + match = re.match( + "^(?P\s*)(?P(\s|.)*?)(?P\s*)$", text + ) + if not match: + raise Exception("Unable to extract text from: %r" % text) + + leading_whitespace = match.groupdict()["leading"] + stripped = match.groupdict()["stripped"] + trailing_whitespace = match.groupdict()["trailing"] + + markdown += leading_whitespace + + sorted_format = sorted( + format, + key=lambda x: FORMAT_PRECEDENCE.index(x[0]) + if x[0] in FORMAT_PRECEDENCE + else -1, + ) + + for f in sorted_format: + if f[0] in _NOTION_TO_MARKDOWN_MAPPER: + if stripped: + markdown += _NOTION_TO_MARKDOWN_MAPPER[f[0]] + if f[0] == "a": + markdown += "[" + + # Check wheter a format modifies the content + content_changed = False + for f in sorted_format: + if f[0] == "e": + markdown += f[1] + content_changed = True + + if not content_changed: + markdown += stripped + + for f in reversed(sorted_format): + if f[0] in _NOTION_TO_MARKDOWN_MAPPER: + if stripped: + markdown += _NOTION_TO_MARKDOWN_MAPPER[f[0]] + if f[0] == "a": + markdown += "]({})".format(f[1]) + + markdown += trailing_whitespace + + # to make it parseable, add a space after if it combines code/links and emphasis formatting + format_types = [f[0] for f in format] + if ( + ("c" in format_types or "a" in format_types) + and ("b" in format_types or "i" in format_types) + and not trailing_whitespace + ): + markdown += " " + + markdown_chunks.append(markdown) + + # use underscores as needed to separate adjacent chunks to avoid ambiguous runs of asterisks + full_markdown = "" + last_used_underscores = False + for i in range(len(markdown_chunks)): + prev = markdown_chunks[i - 1] if i > 0 else "" + curr = markdown_chunks[i] + next = markdown_chunks[i + 1] if i < len(markdown_chunks) - 1 else "" + prev_ended_in_delimiter = not prev or prev[-1] in delimiters + next_starts_with_delimiter = not next or next[0] in delimiters + if ( + prev_ended_in_delimiter + and next_starts_with_delimiter + and not last_used_underscores + and curr.startswith("☃") + and curr.endswith("☃") + ): + if curr[1] == "☃": + count = 2 + else: + count = 1 + curr = "_" * count + curr[count:-count] + "_" * count + last_used_underscores = True + else: + last_used_underscores = False + + final_markdown = curr.replace("☃", "*") + + # to make it parseable, convert emphasis/strong combinations to use a mix of _ and * + if "***" in final_markdown: + final_markdown = final_markdown.replace("***", "**_", 1) + final_markdown = final_markdown.replace("***", "_**", 1) + + full_markdown += final_markdown + + return full_markdown + + +def notion_to_plaintext(notion, client=None): + + plaintext = "" + + for item in notion or []: + + text = item[0] + formats = item[1] if len(item) == 2 else [] + + if text == "‣": + + for f in formats: + if f[0] == "p": # page link + if client is None: + plaintext += "page:" + f[1] + else: + plaintext += client.get_block(f[1]).title_plaintext + elif f[0] == "u": # user link + if client is None: + plaintext += "user:" + f[1] + else: + plaintext += client.get_user(f[1]).full_name + + continue + + plaintext += text + + return plaintext + + +def plaintext_to_notion(plaintext): + + return [[plaintext]] diff --git a/Python-Server/app/notion1/monitor.py b/Python-Server/app/notion1/monitor.py new file mode 100644 index 0000000..5aec5b1 --- /dev/null +++ b/Python-Server/app/notion1/monitor.py @@ -0,0 +1,245 @@ +import json +import re +import requests +import threading +import time +import uuid + +from collections import defaultdict +from inspect import signature +from requests import HTTPError + +from .collection import Collection +from .logger import logger +from .records import Record + + +class Monitor(object): + + thread = None + + def __init__(self, client, root_url="https://msgstore.www.notion.so/primus/"): + self.client = client + self.session_id = str(uuid.uuid4()) + self.root_url = root_url + self._subscriptions = set() + self.initialize() + + def _decode_numbered_json_thing(self, thing): + + thing = thing.decode().strip() + + for ping in re.findall('\d+:\d+"primus::ping::\d+"', thing): + logger.debug("Received ping: {}".format(ping)) + self.post_data(ping.replace("::ping::", "::pong::")) + + results = [] + for blob in re.findall("\d+:\d+(\{.*?\})(?=\d|$)", thing): + results.append(json.loads(blob)) + if thing and not results and "::ping::" not in thing: + logger.debug("Could not parse monitoring response: {}".format(thing)) + return results + + def _encode_numbered_json_thing(self, data): + assert isinstance(data, list) + results = "" + for obj in data: + msg = str(len(obj)) + json.dumps(obj, separators=(",", ":")) + msg = "{}:{}".format(len(msg), msg) + results += msg + return results.encode() + + def initialize(self): + + logger.debug("Initializing new monitoring session.") + + response = self.client.session.get( + "{}?sessionId={}&EIO=3&transport=polling".format( + self.root_url, self.session_id + ) + ) + + self.sid = self._decode_numbered_json_thing(response.content)[0]["sid"] + + logger.debug("New monitoring session ID is: {}".format(self.sid)) + + # resubscribe to any existing subscriptions if we're reconnecting + old_subscriptions, self._subscriptions = self._subscriptions, set() + self.subscribe(old_subscriptions) + + def subscribe(self, records): + + if isinstance(records, set): + records = list(records) + + if not isinstance(records, list): + records = [records] + + sub_data = [] + + for record in records: + + if record not in self._subscriptions: + + logger.debug( + "Subscribing new record to the monitoring watchlist: {}/{}".format( + record._table, record.id + ) + ) + + # add the record to the list of records to restore if we're disconnected + self._subscriptions.add(record) + + # subscribe to changes to the record itself + sub_data.append( + { + "type": "/api/v1/registerSubscription", + "requestId": str(uuid.uuid4()), + "key": "versions/{}:{}".format(record.id, record._table), + "version": record.get("version", -1), + } + ) + + # if it's a collection, subscribe to changes to its children too + if isinstance(record, Collection): + sub_data.append( + { + "type": "/api/v1/registerSubscription", + "requestId": str(uuid.uuid4()), + "key": "collection/{}".format(record.id), + "version": -1, + } + ) + + data = self._encode_numbered_json_thing(sub_data) + + self.post_data(data) + + def post_data(self, data): + + if not data: + return + + logger.debug("Posting monitoring data: {}".format(data)) + + self.client.session.post( + "{}?sessionId={}&transport=polling&sid={}".format( + self.root_url, self.session_id, self.sid + ), + data=data, + ) + + def poll(self, retries=10): + logger.debug("Starting new long-poll request") + try: + response = self.client.session.get( + "{}?sessionId={}&EIO=3&transport=polling&sid={}".format( + self.root_url, self.session_id, self.sid + ) + ) + response.raise_for_status() + except HTTPError as e: + try: + message = "{} / {}".format(response.content, e) + except: + message = "{}".format(e) + logger.warn( + "Problem with submitting polling request: {} (will retry {} more times)".format( + message, retries + ) + ) + time.sleep(0.1) + if retries <= 0: + raise + if retries <= 5: + logger.error( + "Persistent error submitting polling request: {} (will retry {} more times)".format( + message, retries + ) + ) + # if we're close to giving up, also try reinitializing the session + self.initialize() + self.poll(retries=retries - 1) + + self._refresh_updated_records( + self._decode_numbered_json_thing(response.content) + ) + + def _refresh_updated_records(self, events): + + records_to_refresh = defaultdict(list) + + for event in events: + + logger.debug( + "Received the following event from the remote server: {}".format(event) + ) + + if not isinstance(event, dict): + continue + + if event.get("type", "") == "notification": + + key = event.get("key") + + if key.startswith("versions/"): + + match = re.match("versions/([^\:]+):(.+)", key) + if not match: + continue + + record_id, record_table = match.groups() + + local_version = self.client._store.get_current_version( + record_table, record_id + ) + if event["value"] > local_version: + logger.debug( + "Record {}/{} has changed; refreshing to update from version {} to version {}".format( + record_table, record_id, local_version, event["value"] + ) + ) + records_to_refresh[record_table].append(record_id) + else: + logger.debug( + "Record {}/{} already at version {}, not trying to update to version {}".format( + record_table, record_id, local_version, event["value"] + ) + ) + + if key.startswith("collection/"): + + match = re.match("collection/(.+)", key) + if not match: + continue + + collection_id = match.groups()[0] + + self.client.refresh_collection_rows(collection_id) + row_ids = self.client._store.get_collection_rows(collection_id) + + logger.debug( + "Something inside collection {} has changed; refreshing all {} rows inside it".format( + collection_id, len(row_ids) + ) + ) + + records_to_refresh["block"] += row_ids + + self.client.refresh_records(**records_to_refresh) + + def poll_async(self): + if self.thread: + # Already polling async; no need to have two threads + return + self.thread = threading.Thread(target=self.poll_forever, daemon=True) + self.thread.start() + + def poll_forever(self): + while True: + try: + self.poll() + except Exception as e: + logger.error("Encountered error during polling!") + logger.error(e, exc_info=True) + time.sleep(1) diff --git a/Python-Server/app/notion1/operations.py b/Python-Server/app/notion1/operations.py new file mode 100644 index 0000000..c4036b3 --- /dev/null +++ b/Python-Server/app/notion1/operations.py @@ -0,0 +1,31 @@ +from .utils import now + + +def build_operation(id, path, args, command="set", table="block"): + """ + Data updates sent to the submitTransaction endpoint consist of a sequence of "operations". This is a helper + function that constructs one of these operations. + """ + + if isinstance(path, str): + path = path.split(".") + + return {"id": id, "path": path, "args": args, "command": command, "table": table} + + +def operation_update_last_edited(user_id, block_id): + """ + When transactions are submitted from the web UI, it also includes an operation to update the "last edited" + fields, so we want to send those too, for consistency -- this convenience function constructs the operation. + """ + return { + "args": { + "last_edited_by_id": user_id, + "last_edited_by_table": "notion_user", + "last_edited_time": now(), + }, + "command": "update", + "id": block_id, + "path": [], + "table": "block", + } diff --git a/Python-Server/app/notion1/records.py b/Python-Server/app/notion1/records.py new file mode 100644 index 0000000..156c9c2 --- /dev/null +++ b/Python-Server/app/notion1/records.py @@ -0,0 +1,127 @@ +from copy import deepcopy + +from .logger import logger +from .operations import build_operation +from .utils import extract_id, get_by_path + + +class Record(object): + + # if a subclass has a list of ids that should be update when child records are removed, it should specify the key here + child_list_key = None + + def __init__(self, client, id, *args, **kwargs): + self._client = client + self._id = extract_id(id) + self._callbacks = [] + if self._client._monitor is not None: + self._client._monitor.subscribe(self) + + @property + def id(self): + return self._id + + @property + def role(self): + return self._client._store.get_role(self._table, self.id) + + def _str_fields(self): + """ + Determines the list of fields to include in the __str__ representation. Override and extend this in subclasses. + """ + return ["id"] + + def __str__(self): + return ", ".join( + [ + "{}={}".format(field, repr(getattr(self, field))) + for field in self._str_fields() + if getattr(self, field, "") + ] + ) + + def __repr__(self): + return "<{} ({})>".format(self.__class__.__name__, self) + + def refresh(self): + """ + Update the cached data for this record from the server (data for other records may be updated as a side effect). + """ + self._get_record_data(force_refresh=True) + + def _convert_diff_to_changelist(self, difference, old_val, new_val): + changed_values = set() + for operation, path, values in deepcopy(difference): + path = path.split(".") if isinstance(path, str) else path + if operation in ["add", "remove"]: + path.append(values[0][0]) + while isinstance(path[-1], int): + path.pop() + changed_values.add(".".join(map(str, path))) + return [ + ( + "changed_value", + path, + (get_by_path(path, old_val), get_by_path(path, new_val)), + ) + for path in changed_values + ] + + def add_callback(self, callback, callback_id=None, extra_kwargs={}): + assert callable( + callback + ), "The callback must be a 'callable' object, such as a function." + callback_obj = self._client._store.add_callback( + self, callback, callback_id=callback_id, extra_kwargs=extra_kwargs + ) + self._callbacks.append(callback_obj) + return callback_obj + + def remove_callbacks(self, callback_or_callback_id_prefix=None): + print("removing callback " + callback_or_callback_id_prefix) + if callback_or_callback_id_prefix is None: + for callback_obj in list(self._callbacks): + self._client._store.remove_callbacks( + self._table, self.id, callback_or_callback_id_prefix=callback_obj + ) + self._callbacks = [] + else: + self._client._store.remove_callbacks( + self._table, + self.id, + callback_or_callback_id_prefix=callback_or_callback_id_prefix, + ) + if callback_or_callback_id_prefix in self._callbacks: + self._callbacks.remove(callback_or_callback_id_prefix) + + def _get_record_data(self, force_refresh=False): + return self._client.get_record_data( + self._table, self.id, force_refresh=force_refresh + ) + + def get(self, path=[], default=None, force_refresh=False): + """ + Retrieve cached data for this record. The `path` is a list (or dot-delimited string) the specifies the field + to retrieve the value for. If no path is supplied, return the entire cached data structure for this record. + If `force_refresh` is set to True, we force_refresh the data cache from the server before reading the values. + """ + return get_by_path( + path, self._get_record_data(force_refresh=force_refresh), default=default + ) + + def set(self, path, value): + """ + Set a specific `value` (under the specific `path`) on the record's data structure on the server. + """ + self._client.submit_transaction( + build_operation(id=self.id, path=path, args=value, table=self._table) + ) + + def __eq__(self, other): + return self.id == other.id + + def __ne__(self, other): + return self.id != other.id + + def __hash__(self): + return hash(self.id) diff --git a/Python-Server/app/notion1/settings.py b/Python-Server/app/notion1/settings.py new file mode 100644 index 0000000..3878f19 --- /dev/null +++ b/Python-Server/app/notion1/settings.py @@ -0,0 +1,23 @@ +import os +from pathlib import Path + +BASE_URL = "https://www.notion.so/" +API_BASE_URL = BASE_URL + "api/v3/" +SIGNED_URL_PREFIX = "https://www.notion.so/signed/" +S3_URL_PREFIX = "https://s3-us-west-2.amazonaws.com/secure.notion-static.com/" +S3_URL_PREFIX_ENCODED = "https://s3.us-west-2.amazonaws.com/secure.notion-static.com/" +DATA_DIR = os.environ.get( + "NOTION_DATA_DIR", str(Path(os.path.expanduser("~")).joinpath(".notion-py")) +) +CACHE_DIR = str(Path(DATA_DIR).joinpath("cache")) +LOG_FILE = str(Path(DATA_DIR).joinpath("notion.log")) + +try: + os.makedirs(DATA_DIR) +except FileExistsError: + pass + +try: + os.makedirs(CACHE_DIR) +except FileExistsError: + pass diff --git a/Python-Server/app/notion1/smoke_test.py b/Python-Server/app/notion1/smoke_test.py new file mode 100644 index 0000000..d9ff048 --- /dev/null +++ b/Python-Server/app/notion1/smoke_test.py @@ -0,0 +1,281 @@ +from datetime import datetime + +from .client import * +from .block import * +from .collection import NotionDate + + +def run_live_smoke_test(token_v2, parent_page_url_or_id): + + client = NotionClient(token_v2=token_v2) + + parent_page = client.get_block(parent_page_url_or_id) + + page = parent_page.children.add_new( + PageBlock, + title="Smoke test at {}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + ) + + print("Created base smoke test page at:", page.get_browseable_url()) + + col_list = page.children.add_new(ColumnListBlock) + col1 = col_list.children.add_new(ColumnBlock) + col2 = col_list.children.add_new(ColumnBlock) + col1kid = col1.children.add_new( + TextBlock, title="Some formatting: *italic*, **bold**, ***both***!" + ) + assert ( + col1kid.title.replace("_", "*") + == "Some formatting: *italic*, **bold**, ***both***!" + ) + assert col1kid.title_plaintext == "Some formatting: italic, bold, both!" + col2.children.add_new(TodoBlock, title="I should be unchecked") + col2.children.add_new(TodoBlock, title="I should be checked", checked=True) + + page.children.add_new(HeaderBlock, title="The finest music:") + video = page.children.add_new(VideoBlock, width=100) + video.set_source_url("https://www.youtube.com/watch?v=oHg5SJYRHA0") + + assert video in page.children + assert col_list in page.children + assert video in page.children.filter(VideoBlock) + assert col_list not in page.children.filter(VideoBlock) + + # check that the parent does not yet consider this page to be backlinking + assert page not in parent_page.get_backlinks() + + page.children.add_new(SubheaderBlock, title="A link back to where I came from:") + alias = page.children.add_alias(parent_page) + assert alias.is_alias + assert not page.is_alias + page.children.add_new( + QuoteBlock, + title="Clicking [here]({}) should take you to the same place...".format( + page.parent.get_browseable_url() + ), + ) + + # check that the parent now knows about the backlink + assert page in parent_page.get_backlinks() + + # ensure __repr__ methods are not breaking + repr(page) + repr(page.children) + for child in page.children: + repr(child) + + page.children.add_new( + SubheaderBlock, title="The order of the following should be alphabetical:" + ) + + B = page.children.add_new(BulletedListBlock, title="B") + D = page.children.add_new(BulletedListBlock, title="D") + C2 = page.children.add_new(BulletedListBlock, title="C2") + C1 = page.children.add_new(BulletedListBlock, title="C1") + C = page.children.add_new(BulletedListBlock, title="C") + A = page.children.add_new(BulletedListBlock, title="A") + + D.move_to(C, "after") + A.move_to(B, "before") + C2.move_to(C) + C1.move_to(C, "first-child") + + page.children.add_new(CalloutBlock, title="I am a callout", icon="🤞") + + cvb = page.children.add_new(CollectionViewBlock) + cvb.collection = client.get_collection( + client.create_record("collection", parent=cvb, schema=get_collection_schema()) + ) + cvb.title = "My data!" + view = cvb.views.add_new(view_type="table") + + special_code = uuid.uuid4().hex[:8] + + # add a row + row1 = cvb.collection.add_row() + assert row1.person == [] + row1.name = "Just some data" + row1.title = "Can reference 'title' field too! " + special_code + assert row1.name == row1.title + row1.check_yo_self = True + row1.estimated_value = None + row1.estimated_value = 42 + row1.files = [ + "https://www.birdlife.org/sites/default/files/styles/1600/public/slide.jpg" + ] + row1.tags = None + row1.tags = [] + row1.tags = ["A", "C"] + row1.where_to = "https://learningequality.org" + row1.category = "A" + row1.category = "" + row1.category = None + row1.category = "B" + + start = datetime.strptime("2020-01-01 09:30", "%Y-%m-%d %H:%M") + end = datetime.strptime("2020-01-05 20:45", "%Y-%m-%d %H:%M") + timezone = "America/Los_Angeles" + reminder = {"unit": "minute", "value": 30} + row1.some_date = NotionDate(start, end=end, timezone=timezone, reminder=reminder) + + # add another row + row2 = cvb.collection.add_row(person=client.current_user, title="Metallic penguins") + assert row2.person == [client.current_user] + assert row2.name == "Metallic penguins" + row2.check_yo_self = False + row2.estimated_value = 22 + row2.files = [ + "https://www.picclickimg.com/d/l400/pict/223603662103_/Vintage-Small-Monet-and-Jones-JNY-Enamel-Metallic.jpg" + ] + row2.tags = ["A", "B"] + row2.where_to = "https://learningequality.org" + row2.category = "C" + + # check that options "C" have been added to the schema + for prop in ["=d{|", "=d{q"]: + assert cvb.collection.get("schema.{}.options.2.value".format(prop)) == "C" + + # check that existing options "A" haven't been affected + for prop in ["=d{|", "=d{q"]: + assert ( + cvb.collection.get("schema.{}.options.0.id".format(prop)) + == get_collection_schema()[prop]["options"][0]["id"] + ) + + # Run a filtered/sorted query using the view's default parameters + result = view.default_query().execute() + assert row1 == result[0] + assert row2 == result[1] + assert len(result) == 2 + + # query the collection directly + assert row1 in cvb.collection.get_rows(search=special_code) + assert row2 not in cvb.collection.get_rows(search=special_code) + assert row1 not in cvb.collection.get_rows(search="penguins") + assert row2 in cvb.collection.get_rows(search="penguins") + + # search the entire space + assert row1 in client.search_blocks(search=special_code) + assert row1 not in client.search_blocks(search="penguins") + assert row2 not in client.search_blocks(search=special_code) + assert row2 in client.search_blocks(search="penguins") + + # Run an "aggregation" query + aggregations = [ + {"property": "estimated_value", "aggregator": "sum", "id": "total_value"} + ] + result = view.build_query(aggregations=aggregations).execute() + assert result.get_aggregate("total_value") == 64 + + # Run a "filtered" query + filter_params = { + "filters": [ + { + "filter": { + "value": { + "type": "exact", + "value": {"table": "notion_user", "id": client.current_user.id}, + }, + "operator": "person_does_not_contain", + }, + "property": "person", + } + ], + "operator": "and", + } + result = view.build_query(filter=filter_params).execute() + assert row1 in result + assert row2 not in result + + # Run a "sorted" query + sort_params = [{"direction": "ascending", "property": "estimated_value"}] + result = view.build_query(sort=sort_params).execute() + assert row1 == result[1] + assert row2 == result[0] + + # Test that reminders and time zone's work properly + row1.refresh() + assert row1.some_date.start == start + assert row1.some_date.end == end + assert row1.some_date.timezone == timezone + assert row1.some_date.reminder == reminder + + print( + "Check it out and make sure it looks good, then press any key here to delete it..." + ) + input() + + _delete_page_fully(page) + + +def _delete_page_fully(page): + + id = page.id + + parent_page = page.parent + + assert page.get("alive") == True + assert page in parent_page.children + page.remove() + assert page.get("alive") == False + assert page not in parent_page.children + + assert ( + page.space_info + ), "Page {} was fully deleted prematurely, as we can't get space info about it anymore".format( + id + ) + + page.remove(permanently=True) + + time.sleep(1) + + assert ( + not page.space_info + ), "Page {} was not really fully deleted, as we can still get space info about it".format( + id + ) + + +def get_collection_schema(): + return { + "%9:q": {"name": "Check Yo'self", "type": "checkbox"}, + "=d{|": { + "name": "Tags", + "type": "multi_select", + "options": [ + { + "color": "orange", + "id": "79560dab-c776-43d1-9420-27f4011fcaec", + "value": "A", + }, + { + "color": "default", + "id": "002c7016-ac57-413a-90a6-64afadfb0c44", + "value": "B", + }, + ], + }, + "=d{q": { + "name": "Category", + "type": "select", + "options": [ + { + "color": "orange", + "id": "59560dab-c776-43d1-9420-27f4011fcaec", + "value": "A", + }, + { + "color": "default", + "id": "502c7016-ac57-413a-90a6-64afadfb0c44", + "value": "B", + }, + ], + }, + "LL[(": {"name": "Person", "type": "person"}, + "4Jv$": {"name": "Estimated value", "type": "number"}, + "OBcJ": {"name": "Where to?", "type": "url"}, + "TwR:": {"name": "Some Date", "type": "date"}, + "dV$q": {"name": "Files", "type": "file"}, + "title": {"name": "Name", "type": "title"}, + } diff --git a/Python-Server/app/notion1/space.py b/Python-Server/app/notion1/space.py new file mode 100644 index 0000000..0b15144 --- /dev/null +++ b/Python-Server/app/notion1/space.py @@ -0,0 +1,51 @@ +from .logger import logger +from .maps import property_map, field_map +from .records import Record + + +class Space(Record): + + _table = "space" + + child_list_key = "pages" + + name = field_map("name") + domain = field_map("domain") + icon = field_map("icon") + + @property + def pages(self): + # The page list includes pages the current user might not have permissions on, so it's slow to query. + # Instead, we just filter for pages with the space as the parent. + return self._client.search_pages_with_parent(self.id) + + @property + def users(self): + user_ids = [permission["user_id"] for permission in self.get("permissions")] + self._client.refresh_records(notion_user=user_ids) + return [self._client.get_user(user_id) for user_id in user_ids] + + def _str_fields(self): + return super()._str_fields() + ["name", "domain"] + + def add_page(self, title, type="page", shared=False): + assert type in [ + "page", + "collection_view_page", + ], "'type' must be one of 'page' or 'collection_view_page'" + if shared: + permissions = [{"role": "editor", "type": "space_permission"}] + else: + permissions = [ + { + "role": "editor", + "type": "user_permission", + "user_id": self._client.current_user.id, + } + ] + page_id = self._client.create_record( + "block", self, type=type, permissions=permissions + ) + page = self._client.get_block(page_id) + page.title = title + return page diff --git a/Python-Server/app/notion1/store.py b/Python-Server/app/notion1/store.py new file mode 100644 index 0000000..1cde782 --- /dev/null +++ b/Python-Server/app/notion1/store.py @@ -0,0 +1,411 @@ +import datetime +import json +import threading +import uuid + +from collections import defaultdict +from copy import deepcopy +from dictdiffer import diff +from inspect import signature +from threading import Lock +from pathlib import Path +from tzlocal import get_localzone + +from .logger import logger +from .settings import CACHE_DIR +from .utils import extract_id + + +class MissingClass(object): + def __bool__(self): + return False + + +Missing = MissingClass() + + +class Callback(object): + def __init__( + self, callback, record, callback_id=None, extra_kwargs={}, watch_children=True + ): + self.callback = callback + self.record = record + self.callback_id = callback_id or str(uuid.uuid4()) + self.extra_kwargs = extra_kwargs + + def __call__(self, difference, old_val, new_val): + kwargs = {} + kwargs.update(self.extra_kwargs) + kwargs["record"] = self.record + kwargs["callback_id"] = self.callback_id + kwargs["difference"] = difference + kwargs["changes"] = self.record._convert_diff_to_changelist( + difference, old_val, new_val + ) + + logger.debug("Firing callback {} with kwargs: {}".format(self.callback, kwargs)) + + # trim down the parameters we'll be passing, to include only those the callback will accept + params = signature(self.callback).parameters + if not any(["**" in str(param) for param in params.values()]): + # there's no "**kwargs" in the callback signature, so remove any unaccepted params + for arg in list(kwargs.keys()): + if arg not in params: + del kwargs[arg] + + # perform the callback, gracefully handling any exceptions + try: + # trigger the callback within its own thread, so it won't block others if it's long-running + threading.Thread(target=self.callback, kwargs=kwargs, daemon=True).start() + except Exception as e: + logger.error( + "Error while processing callback for {}: {}".format( + repr(self.record), repr(e) + ) + ) + + def __eq__(self, val): + if isinstance(val, str): + return self.callback_id.startswith(val) + elif isinstance(val, Callback): + return self.callback_id == val.callback_id + else: + return False + + +class RecordStore(object): + def __init__(self, client, cache_key=None): + self._mutex = Lock() + self._client = client + self._cache_key = cache_key + self._values = defaultdict(lambda: defaultdict(dict)) + self._role = defaultdict(lambda: defaultdict(str)) + self._collection_row_ids = {} + self._callbacks = defaultdict(lambda: defaultdict(list)) + self._records_to_refresh = {} + self._pages_to_refresh = [] + with self._mutex: + self._load_cache() + + def _get(self, table, id): + return self._values[table].get(id, Missing) + + def add_callback(self, record, callback, callback_id=None, extra_kwargs={}): + assert callable( + callback + ), "The callback must be a 'callable' object, such as a function." + self.remove_callbacks(record._table, record.id, callback_id) + callback_obj = Callback( + callback, record, callback_id=callback_id, extra_kwargs=extra_kwargs + ) + self._callbacks[record._table][record.id].append(callback_obj) + return callback_obj + + def remove_callbacks(self, table, id, callback_or_callback_id_prefix=""): + """ + Remove all callbacks for the record specified by `table` and `id` that have a callback_id + starting with the string `callback_or_callback_id_prefix`, or are equal to the provided callback. + """ + if callback_or_callback_id_prefix is None: + return + callbacks = self._callbacks[table][id] + while callback_or_callback_id_prefix in callbacks: + callbacks.remove(callback_or_callback_id_prefix) + + def _get_cache_path(self, attribute): + return str( + Path(CACHE_DIR).joinpath("{}{}.json".format(self._cache_key, attribute)) + ) + + def _load_cache(self, attributes=("_values", "_role", "_collection_row_ids")): + if not self._cache_key: + return + for attr in attributes: + try: + with open(self._get_cache_path(attr)) as f: + if attr == "_collection_row_ids": + self._collection_row_ids.update(json.load(f)) + else: + for k, v in json.load(f).items(): + getattr(self, attr)[k].update(v) + except (FileNotFoundError, ValueError): + pass + + def set_collection_rows(self, collection_id, row_ids): + + if collection_id in self._collection_row_ids: + old_ids = set(self._collection_row_ids[collection_id]) + new_ids = set(row_ids) + added = new_ids - old_ids + removed = old_ids - new_ids + for id in added: + self._trigger_callbacks( + "collection", + collection_id, + [("row_added", "rows", id)], + old_ids, + new_ids, + ) + for id in removed: + self._trigger_callbacks( + "collection", + collection_id, + [("row_removed", "rows", id)], + old_ids, + new_ids, + ) + self._collection_row_ids[collection_id] = row_ids + self._save_cache("_collection_row_ids") + + def get_collection_rows(self, collection_id): + return self._collection_row_ids.get(collection_id, []) + + def _save_cache(self, attribute): + if not self._cache_key: + return + with open(self._get_cache_path(attribute), "w") as f: + json.dump(getattr(self, attribute), f) + + def _trigger_callbacks(self, table, id, difference, old_val, new_val): + for callback_obj in self._callbacks[table][id]: + callback_obj(difference, old_val, new_val) + + def get_role(self, table, id, force_refresh=False): + self.get(table, id, force_refresh=force_refresh) + return self._role[table].get(id, None) + + def get(self, table, id, force_refresh=False): + id = extract_id(id) + # look up the record in the current local dataset + result = self._get(table, id) + # if it's not found, try refreshing the record from the server + if result is Missing or force_refresh: + if table == "block": + self.call_load_page_chunk(id) + else: + self.call_get_record_values(**{table: id}) + result = self._get(table, id) + return result if result is not Missing else None + + def _update_record(self, table, id, value=None, role=None): + + callback_queue = [] + + with self._mutex: + if role: + logger.debug("Updating 'role' for {}/{} to {}".format(table, id, role)) + self._role[table][id] = role + self._save_cache("_role") + if value: + logger.debug( + "Updating 'value' for {}/{} to {}".format(table, id, value) + ) + old_val = self._values[table][id] + difference = list( + diff( + old_val, + value, + ignore=["version", "last_edited_time", "last_edited_by"], + expand=True, + ) + ) + self._values[table][id] = value + self._save_cache("_values") + if old_val and difference: + logger.debug("Value changed! Difference: {}".format(difference)) + callback_queue.append((table, id, difference, old_val, value)) + + # run callbacks outside the mutex to avoid lockups + for cb in callback_queue: + self._trigger_callbacks(*cb) + + def call_get_record_values(self, **kwargs): + """ + Call the server's getRecordValues endpoint to update the local record store. The keyword arguments map + table names into lists of (or singular) record IDs to load for that table. Use True to refresh all known + records for that table. + """ + + requestlist = [] + + for table, ids in kwargs.items(): + + # ensure "ids" is a proper list + if ids is True: + ids = list(self._values.get(table, {}).keys()) + if isinstance(ids, str): + ids = [ids] + + # if we're in a transaction, add the requested IDs to a queue to refresh when the transaction completes + if self._client.in_transaction(): + self._records_to_refresh[table] = list( + set(self._records_to_refresh.get(table, []) + ids) + ) + continue + + requestlist += [{"table": table, "id": extract_id(id)} for id in ids] + + if requestlist: + logger.debug( + "Calling 'getRecordValues' endpoint for requests: {}".format( + requestlist + ) + ) + results = self._client.post( + "getRecordValues", {"requests": requestlist} + ).json()["results"] + for request, result in zip(requestlist, results): + self._update_record( + request["table"], + request["id"], + value=result.get("value"), + role=result.get("role"), + ) + + def get_current_version(self, table, id): + values = self._get(table, id) + if values and "version" in values: + return values["version"] + else: + return -1 + + def call_load_page_chunk(self, page_id): + + if self._client.in_transaction(): + self._pages_to_refresh.append(page_id) + return + + data = { + "pageId": page_id, + "limit": 100, + "cursor": {"stack": []}, + "chunkNumber": 0, + "verticalColumns": False, + } + + recordmap = self._client.post("loadPageChunk", data).json()["recordMap"] + + self.store_recordmap(recordmap) + + def store_recordmap(self, recordmap): + for table, records in recordmap.items(): + if not isinstance(records, dict): + continue + for id, record in records.items(): + if not isinstance(record, dict): + continue + self._update_record( + table, id, value=record.get("value"), role=record.get("role") + ) + + def call_query_collection( + self, + collection_id, + collection_view_id, + search="", + type="table", + aggregate=[], + aggregations=[], + filter={}, + sort=[], + calendar_by="", + group_by="", + ): + + assert not ( + aggregate and aggregations + ), "Use only one of `aggregate` or `aggregations` (old vs new format)" + + # convert singletons into lists if needed + if isinstance(aggregate, dict): + aggregate = [aggregate] + if isinstance(sort, dict): + sort = [sort] + + data = { + "collectionId": collection_id, + "collectionViewId": collection_view_id, + "loader": { + "limit": 10000, + "loadContentCover": True, + "searchQuery": search, + "userLocale": "en", + "userTimeZone": str(get_localzone()), + "type": type, + }, + "query": { + "aggregate": aggregate, + "aggregations": aggregations, + "filter": filter, + "sort": sort, + }, + } + + response = self._client.post("queryCollection", data).json() + + self.store_recordmap(response["recordMap"]) + + return response["result"] + + def handle_post_transaction_refreshing(self): + + for block_id in self._pages_to_refresh: + self.call_load_page_chunk(block_id) + self._pages_to_refresh = [] + + self.call_get_record_values(**self._records_to_refresh) + self._records_to_refresh = {} + + def run_local_operations(self, operations): + """ + Called to simulate the results of running the operations on the server, to keep the record store in sync + even when we haven't completed a refresh (or we did a refresh but the database hadn't actually updated yet...) + """ + for operation in operations: + self.run_local_operation(**operation) + + def run_local_operation(self, table, id, path, command, args): + + with self._mutex: + path = deepcopy(path) + new_val = deepcopy(self._values[table][id]) + + ref = new_val + + # loop and descend down the path until it's consumed, or if we're doing a "set", there's one key left + while (len(path) > 1) or (path and command != "set"): + comp = path.pop(0) + if comp not in ref: + ref[comp] = [] if "list" in command else {} + ref = ref[comp] + + if command == "update": + assert isinstance(ref, dict) + ref.update(args) + elif command == "set": + assert isinstance(ref, dict) + if path: + ref[path[0]] = args + else: + # this is the case of "setting the top level" (i.e. creating a record) + ref.clear() + ref.update(args) + elif command == "listAfter": + assert isinstance(ref, list) + if "after" in args: + ref.insert(ref.index(args["after"]) + 1, args["id"]) + else: + ref.append(args["id"]) + elif command == "listBefore": + assert isinstance(ref, list) + if "before" in args: + ref.insert(ref.index(args["before"]), args["id"]) + else: + ref.insert(0, args["id"]) + elif command == "listRemove": + try: + ref.remove(args["id"]) + except ValueError: + pass + + self._update_record(table, id, value=new_val) diff --git a/Python-Server/app/notion1/user.py b/Python-Server/app/notion1/user.py new file mode 100644 index 0000000..f216405 --- /dev/null +++ b/Python-Server/app/notion1/user.py @@ -0,0 +1,21 @@ +from .logger import logger +from .maps import property_map, field_map +from .records import Record + + +class User(Record): + + _table = "notion_user" + + given_name = field_map("given_name") + family_name = field_map("family_name") + email = field_map("email") + locale = field_map("locale") + time_zone = field_map("time_zone") + + @property + def full_name(self): + return " ".join([self.given_name or "", self.family_name or ""]).strip() + + def _str_fields(self): + return super()._str_fields() + ["email", "full_name"] diff --git a/Python-Server/app/notion1/utils.py b/Python-Server/app/notion1/utils.py new file mode 100644 index 0000000..6f4d18f --- /dev/null +++ b/Python-Server/app/notion1/utils.py @@ -0,0 +1,108 @@ +import requests +import uuid + +from bs4 import BeautifulSoup +from urllib.parse import urlparse, parse_qs, quote_plus, unquote_plus +from datetime import datetime +from slugify import slugify as _dash_slugify + +from .settings import BASE_URL, SIGNED_URL_PREFIX, S3_URL_PREFIX, S3_URL_PREFIX_ENCODED + + +class InvalidNotionIdentifier(Exception): + pass + + +def now(): + return int(datetime.now().timestamp() * 1000) + + +def extract_id(url_or_id): + """ + Extract the block/page ID from a Notion.so URL -- if it's a bare page URL, it will be the + ID of the page. If there's a hash with a block ID in it (from clicking "Copy Link") on a + block in a page), it will instead be the ID of that block. If it's already in ID format, + it will be passed right through. + """ + input_value = url_or_id + if url_or_id.startswith(BASE_URL): + url_or_id = ( + url_or_id.split("#")[-1] + .split("/")[-1] + .split("&p=")[-1] + .split("?")[0] + .split("-")[-1] + ) + try: + return str(uuid.UUID(url_or_id)) + except ValueError: + raise InvalidNotionIdentifier(input_value) + + +def get_embed_data(source_url): + + return requests.get( + "https://api.embed.ly/1/oembed?key=421626497c5d4fc2ae6b075189d602a2&url={}".format( + source_url + ) + ).json() + + +def get_embed_link(source_url): + + data = get_embed_data(source_url) + + if "html" not in data: + return source_url + + url = list(BeautifulSoup(data["html"], "html.parser").children)[0]["src"] + + return parse_qs(urlparse(url).query)["src"][0] + + +def add_signed_prefix_as_needed(url, client=None, id=""): + + if url is None: + return + + if url.startswith(S3_URL_PREFIX): + url = SIGNED_URL_PREFIX + quote_plus(url) + "?table=block&id=" + id + if client: + url = client.session.head(url).headers.get("Location") + + return url + + +def remove_signed_prefix_as_needed(url): + if url is None: + return + if url.startswith(SIGNED_URL_PREFIX): + return unquote_plus(url[len(S3_URL_PREFIX) :]) + elif url.startswith(S3_URL_PREFIX_ENCODED): + parsed = urlparse(url.replace(S3_URL_PREFIX_ENCODED, S3_URL_PREFIX)) + return "{}://{}{}".format(parsed.scheme, parsed.netloc, parsed.path) + else: + return url + + +def slugify(original): + return _dash_slugify(original).replace("-", "_") + + +def get_by_path(path, obj, default=None): + + if isinstance(path, str): + path = path.split(".") + + value = obj + + # try to traverse down the sequence of keys defined in the path, to get the target value if it exists + try: + for key in path: + if isinstance(value, list): + key = int(key) + value = value[key] + except (KeyError, TypeError, IndexError): + value = default + + return value diff --git a/Python Server/app/requirements.txt b/Python-Server/app/requirements.txt similarity index 100% rename from Python Server/app/requirements.txt rename to Python-Server/app/requirements.txt diff --git a/Python Server/app/server.py b/Python-Server/app/server.py similarity index 100% rename from Python Server/app/server.py rename to Python-Server/app/server.py diff --git a/Python Server/app/static/error.html b/Python-Server/app/static/error.html similarity index 100% rename from Python Server/app/static/error.html rename to Python-Server/app/static/error.html diff --git a/Python Server/app/static/icon.png b/Python-Server/app/static/icon.png similarity index 100% rename from Python Server/app/static/icon.png rename to Python-Server/app/static/icon.png diff --git a/Python Server/app/static/thank_you.html b/Python-Server/app/static/thank_you.html similarity index 100% rename from Python Server/app/static/thank_you.html rename to Python-Server/app/static/thank_you.html diff --git a/Python Server/app/static/version.cfg b/Python-Server/app/static/version.cfg similarity index 100% rename from Python Server/app/static/version.cfg rename to Python-Server/app/static/version.cfg diff --git a/Python Server/app/templates/options.html b/Python-Server/app/templates/options.html similarity index 100% rename from Python Server/app/templates/options.html rename to Python-Server/app/templates/options.html diff --git a/Python Server/app/translation/translation_manager.py b/Python-Server/app/translation/translation_manager.py similarity index 100% rename from Python Server/app/translation/translation_manager.py rename to Python-Server/app/translation/translation_manager.py diff --git a/Python Server/app/utils/check_update.py b/Python-Server/app/utils/check_update.py similarity index 100% rename from Python Server/app/utils/check_update.py rename to Python-Server/app/utils/check_update.py diff --git a/Python Server/app/utils/custom_errors.py b/Python-Server/app/utils/custom_errors.py similarity index 100% rename from Python Server/app/utils/custom_errors.py rename to Python-Server/app/utils/custom_errors.py diff --git a/Python-Server/app/utils/lang_utils.py b/Python-Server/app/utils/lang_utils.py new file mode 100644 index 0000000..c4529cd --- /dev/null +++ b/Python-Server/app/utils/lang_utils.py @@ -0,0 +1,17 @@ +def get_response_text(status_code): + print("Sending response {}".format(status_code)) + + if status_code == 200: + return "Added to your mind.", "success" + elif status_code == 404: + return "No Notion credentials where provided. Please add them on the server.", "error" + elif status_code == 204: + return "Title and Url was modified correctly", "success" + elif status_code == 205: + return "Tags updated succesfully", "success" + elif status_code == 429: + return "Too much requests to Notion API", "error" + elif status_code == 400: + return "Invalid url provided", "error" + else: + return "Invalid url or text was provided.", "error" diff --git a/Python Server/app/utils/utils.py b/Python-Server/app/utils/utils.py similarity index 100% rename from Python Server/app/utils/utils.py rename to Python-Server/app/utils/utils.py diff --git a/README.md b/README.md index 8cb7b9e..4163976 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ collections + collections ### Search @@ -71,6 +72,8 @@ The idea is to have extensions for your mind on the browser, and app on android Right now, there's a working Python Local Server, that receives all the data from the extension and the app, and publishes it to your fully customizable and searchable database in Notion. So it is 100% open source and fully private! +Maybe we can say it is an Open Source Alternative to [Raindrop](https://raindrop.io/) and [Microsoft Edge Collections](https://support.microsoft.com/en-us/microsoft-edge/organize-your-ideas-with-collections-in-microsoft-edge-60fd7bba-6cfd-00b9-3787-b197231b507e), but much cooler with Community driven opinion and AI Capabilities, and a repo maintainer with lot of imagination (yes my brain goes at 150% speed)! + ## Examples of what you can do. Add text to your mind | Add images to your mind diff --git a/setup.sh b/setup.sh index ba3cce8..010680f 100644 --- a/setup.sh +++ b/setup.sh @@ -52,7 +52,7 @@ fi git clone https://github.com/elblogbruno/NotionAI-MyMind -cd NotionAI-MyMind && pip -r install requirements.txt +cd NotionAI-MyMind/Python-Server/app && pip -r install requirements.txt if [ "$?" = "1" ] then